YSmart outperforms the current SQL-to-MapReduce translators because of its unique correlation-aware feature. To illustrate this advantage, an experiment has been conducted to compare the performance of Hive and YSmart.
- CPU:a quad-core Intel Xeon X3220 processor (2.4 GHz)
- Memory: 4GB of RAM
- Hard Disk: a 500GB hard disk
- Software: Ubuntu 11.04 and hadoop-0.20.204.0
- Hadoop with 4 mappers and 2 reducers
- Hive patched with correlation aware ability on
- Hive patched with correlation aware ability off
- YSmart with correlation aware ability on
- YSmart with correlation aware ability off
- TPC-H Q17, Q18 and Q21, whose subqueries have been unnested before testing.
- TPC-H Q17
- TPC-H Q18
- TPC-H Q21
- The result shows that the performance of both Hive and YSmart have been improved significantly with correlation aware ability.
select sum(l_extendedprice) / 7.0 as avg_yearly from ( select l_partkey, l_quantity, l_extendedprice from lineitem, part where p_partkey = l_partkey and p_brand = 'Brand#34' and p_container = 'MED PACK' )touter, ( select l_partkey as lp, 0.2 * avg(l_quantity) as lq from lineitem group by l_partkey ) tinner where touter.l_partkey = tinner.lp and touter.l_quantity < tinner.lq;
select c_name,c_custkey,o_orderkey,o_orderdate,o_totalprice,sum(l_quantity) from ( select o_custkey,o_orderkey,o_orderdate,o_totalprice,l_quantity,t_sum_quantity from ( select o_custkey,o_orderkey,o_orderdate,o_totalprice,l_quantity from orders oo, lineitem l where l.l_orderkey = oo.o_orderkey ) tt1, ( select l_orderkey,sum(l_quantity) as t_sum_quantity from lineitem group by l_orderkey ) tt2 where tt1.o_orderkey = tt2.l_orderkey and tt2.t_sum_quantity > 300 ) p, customer cc where cc.c_custkey = p.o_custkey group by c_name,c_custkey,o_orderkey,o_orderdate,o_totalprice order by o_totalprice desc, o_orderdate;
select s_name,count(*) as numwait from supplier, nation, ( select sq12.l_suppkey as ls from ( select sq1.l_orderkey, sq1.l_suppkey from ( select l_orderkey, l_suppkey from lineitem, orders where o_orderkey = l_orderkey and l_receiptdate > l_commitdate and o_orderstatus = 'F' ) sq1, ( select l_orderkey, count_distinct(l_suppkey) as count_suppkey, max(l_suppkey) as max_suppkey from lineitem group by l_orderkey ) sq2 where sq1.l_orderkey = sq2.l_orderkey and ((sq2.count_suppkey > 1) or ((sq2.count_suppkey=1) and (sq1.l_suppkey <> sq2.max_suppkey))) ) sq12 left outer join ( select l_orderkey, count_distinct(l_suppkey) as count_suppkey, max(l_suppkey) as max_suppkey from lineitem where l_receiptdate > l_commitdate group by l_orderkey ) sq3 on sq12.l_orderkey = sq3.l_orderkey where (sq3.count_suppkey is null) or ((sq3.count_suppkey=1) and (sq12.l_suppkey = sq3.max_suppkey)) ) complex where s_suppkey = complex.ls and s_nationkey = n_nationkey and n_name = 'IRAQ' group by s_name order by numwait desc, s_name;