Performance
YSmart outperforms the current SQL-to-MapReduce translators because of its unique correlation-aware feature. To illustrate this advantage, an experiment has been conducted to compare the performance of Hive and YSmart.
- CPU:a quad-core Intel Xeon X3220 processor (2.4 GHz)
- Memory: 4GB of RAM
- Hard Disk: a 500GB hard disk
- Software: Ubuntu 11.04 and hadoop-0.20.204.0
- Hadoop with 4 mappers and 2 reducers
- Hive patched with correlation aware ability on
- Hive patched with correlation aware ability off
- YSmart with correlation aware ability on
- YSmart with correlation aware ability off
- TPC-H Q17, Q18 and Q21, whose subqueries have been unnested before testing.
- TPC-H Q17
- TPC-H Q18
- TPC-H Q21
- The result shows that the performance of both Hive and YSmart have been improved significantly with correlation aware ability.
Test environment
System configuration
Test queries
select
sum(l_extendedprice) / 7.0 as avg_yearly
from
(
select
l_partkey, l_quantity, l_extendedprice
from
lineitem,
part
where
p_partkey = l_partkey
and p_brand = 'Brand#34'
and p_container = 'MED PACK'
)touter,
(
select
l_partkey as lp, 0.2 * avg(l_quantity) as lq
from
lineitem
group by
l_partkey
) tinner
where
touter.l_partkey = tinner.lp
and touter.l_quantity < tinner.lq;
select
c_name,c_custkey,o_orderkey,o_orderdate,o_totalprice,sum(l_quantity)
from
(
select
o_custkey,o_orderkey,o_orderdate,o_totalprice,l_quantity,t_sum_quantity
from
(
select
o_custkey,o_orderkey,o_orderdate,o_totalprice,l_quantity
from
orders oo,
lineitem l
where
l.l_orderkey = oo.o_orderkey
) tt1,
(
select
l_orderkey,sum(l_quantity) as t_sum_quantity
from
lineitem
group by
l_orderkey
) tt2
where
tt1.o_orderkey = tt2.l_orderkey
and tt2.t_sum_quantity > 300
) p,
customer cc
where
cc.c_custkey = p.o_custkey
group by
c_name,c_custkey,o_orderkey,o_orderdate,o_totalprice
order by
o_totalprice desc, o_orderdate;
select
s_name,count(*) as numwait
from
supplier,
nation,
(
select
sq12.l_suppkey as ls
from
(
select
sq1.l_orderkey, sq1.l_suppkey
from
(
select
l_orderkey, l_suppkey
from
lineitem,
orders
where
o_orderkey = l_orderkey
and l_receiptdate > l_commitdate
and o_orderstatus = 'F'
) sq1,
(
select
l_orderkey, count_distinct(l_suppkey) as count_suppkey, max(l_suppkey) as max_suppkey
from
lineitem
group by
l_orderkey
) sq2
where
sq1.l_orderkey = sq2.l_orderkey
and ((sq2.count_suppkey > 1)
or ((sq2.count_suppkey=1) and (sq1.l_suppkey <> sq2.max_suppkey)))
) sq12
left outer join
(
select
l_orderkey, count_distinct(l_suppkey) as count_suppkey, max(l_suppkey) as max_suppkey
from
lineitem
where
l_receiptdate > l_commitdate
group by
l_orderkey
) sq3
on
sq12.l_orderkey = sq3.l_orderkey
where
(sq3.count_suppkey is null)
or ((sq3.count_suppkey=1) and (sq12.l_suppkey = sq3.max_suppkey))
) complex
where
s_suppkey = complex.ls
and s_nationkey = n_nationkey
and n_name = 'IRAQ'
group by
s_name
order by
numwait desc, s_name;
Result