create table products(prod_id int, name string, price double, last_modified timestamp) stored as parquet;
create table orders(order_id int, prod_id int, quantity int, order_date timestamp) stored as parquet;
insert into products values(1,'Soap',1.0,'2017-01-01');
insert into products values(1,'Soap',1.1,'2017-04-01');
insert into products values(1,'Soap',1.2,'2017-07-01');
insert into products values(1,'Soap',1.3,'2017-10-01');
insert into products values(2,'Soda',0.6,'2017-01-01');
insert into products values(2,'Soda',0.8,'2017-06-01');
insert into products values(3,'Beer',4.2,'2017-07-01');
insert into orders values(1,1,1,'2017-02-10');
insert into orders values(2,2,10,'2017-07-10');
insert into orders values(3,3,5,'2017-09-10');
insert into orders values(4,1,2,'2017-12-01');
create view prod_scd2 as
select prod_id,price,last_modified as valid_from,
lead(last_modified) over (partition by prod_id order by last_modified) as valid_end
from products;
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000> select * from prod_scd2 p;
+------------+----------+------------------------+------------------------+--+
| p.prod_id | p.price | p.valid_from | p.valid_end |
+------------+----------+------------------------+------------------------+--+
| 1 | 1.0 | 2017-01-01 00:00:00.0 | 2017-04-01 00:00:00.0 |
| 1 | 1.1 | 2017-04-01 00:00:00.0 | 2017-07-01 00:00:00.0 |
| 1 | 1.2 | 2017-07-01 00:00:00.0 | 2017-10-01 00:00:00.0 |
| 1 | 1.3 | 2017-10-01 00:00:00.0 | NULL |
| 2 | 0.6 | 2017-01-01 00:00:00.0 | 2017-06-01 00:00:00.0 |
| 2 | 0.8 | 2017-06-01 00:00:00.0 | NULL |
| 3 | 4.2 | 2017-07-01 00:00:00.0 | NULL |
+------------+----------+------------------------+------------------------+--+
7 rows selected (20.253 seconds)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000> select * from orders o;
+-------------+------------+-------------+------------------------+--+
| o.order_id | o.prod_id | o.quantity | o.order_date |
+-------------+------------+-------------+------------------------+--+
| 1 | 1 | 1 | 2017-02-10 00:00:00.0 |
| 2 | 2 | 10 | 2017-07-10 00:00:00.0 |
| 3 | 3 | 5 | 2017-09-10 00:00:00.0 |
| 4 | 1 | 2 | 2017-12-01 00:00:00.0 |
+-------------+------------+-------------+------------------------+--+
4 rows selected (0.152 seconds)
select o.order_id,o.order_date,p.price,o.quantity*p.price as total_cost
from orders o join prod_scd2 p
on (o.prod_id=p.prod_id)
where o.order_date between p.valid_from and if(p.valid_end is not null ,p.valid_end,cast('9999-12-31' as timestamp))
order by order_id;
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000> select o.order_id,o.order_date,p.price,o.quantity*p.price as total_cost
. . . . . . . . . . . . . . . . . . . . .> from orders o join prod_scd2 p
. . . . . . . . . . . . . . . . . . . . .> on (o.prod_id=p.prod_id)
. . . . . . . . . . . . . . . . . . . . .> where o.order_date between p.valid_from and if(p.valid_end is not null ,p.valid_end,cast('9999-12-31' as timestamp))
. . . . . . . . . . . . . . . . . . . . .> order by order_id;
+-------------+------------------------+----------+-------------+--+
| o.order_id | o.order_date | p.price | total_cost |
+-------------+------------------------+----------+-------------+--+
| 1 | 2017-02-10 00:00:00.0 | 1.0 | 1.0 |
| 2 | 2017-07-10 00:00:00.0 | 0.8 | 8.0 |
| 3 | 2017-09-10 00:00:00.0 | 4.2 | 21.0 |
| 4 | 2017-12-01 00:00:00.0 | 1.3 | 2.6 |
+-------------+------------------------+----------+-------------+--+
4 rows selected (48.862 seconds)
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment