[donghua@cdh-vm temp]$ hcat -e "desc employees.departments"
dept_no string
dept_name string
[donghua@cdh-vm temp]$ hcat -e "desc employees.dept_manager"
emp_no int
dept_no string
from_date string
to_date string
[donghua@cdh-vm temp]$ hcat -e "desc employees.dept_emp"
emp_no int
dept_no string
from_date string
to_date string
[donghua@cdh-vm temp]$ hcat -e "desc employees.employees"
emp_no int
birth_date string
first_name string
last_name string
gender string
hire_date string
-- Find out their manager name & department size
select d.dept_name, concat(m.first_name,' ',m.last_name) as manager, count(e.emp_no) employees
from employees.departments d
join employees.dept_manager dm on d.dept_no = dm.dept_no
join employees.employees m on dm.emp_no = m.emp_no
join employees.dept_emp de on d.dept_no = de.dept_no
join employees.employees e on de.emp_no = e.emp_no
where de.to_date >'2018-01-01'
and dm.to_date > '2018-01-01'
group by d.dept_name, concat(m.first_name,' ',m.last_name)
order by d.dept_name;
Run SQL in Hive:
Connecting to jdbc:hive2://cdh-vm.dbaglobe.com:10000/employees
Connected to: Apache Hive (version 1.1.0-cdh5.14.0)
Driver: Hive JDBC (version 1.1.0-cdh5.14.0)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 1.1.0-cdh5.14.0 by Apache Hive
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/emp> -- Find out their manager name & department size
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/emp> select d.dept_name, concat(m.first_name,' ',m.last_name) as manager, count(e.emp_no) employees
. . . . . . . . . . . . . . . . . . . . . . .> from employees.departments d
. . . . . . . . . . . . . . . . . . . . . . .> join employees.dept_manager dm on d.dept_no = dm.dept_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.employees m on dm.emp_no = m.emp_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.dept_emp de on d.dept_no = de.dept_no
. . . . . . . . . . . . . . . . . . . . . . .> join employees.employees e on de.emp_no = e.emp_no
. . . . . . . . . . . . . . . . . . . . . . .> where de.to_date >'2018-01-01'
. . . . . . . . . . . . . . . . . . . . . . .> and dm.to_date > '2018-01-01'
. . . . . . . . . . . . . . . . . . . . . . .> group by d.dept_name, concat(m.first_name,' ',m.last_name)
. . . . . . . . . . . . . . . . . . . . . . .> order by d.dept_name;
+---------------------+--------------------+------------+--+
| d.dept_name | manager | employees |
+---------------------+--------------------+------------+--+
| Customer Service | Yuchang Weedman | 17569 |
| Development | Leon DasSarma | 61386 |
| Finance | Isamu Legleitner | 12437 |
| Human Resources | Karsten Sigstam | 12898 |
| Marketing | Vishwani Minakawa | 14842 |
| Production | Oscar Ghazalie | 53304 |
| Quality Management | Dung Pesch | 14546 |
| Research | Hilary Kambil | 15441 |
| Sales | Hauke Zhang | 37701 |
+---------------------+--------------------+------------+--+
9 rows selected (100.528 seconds)
Run SQL in Impala:
Connected to cdh-vm.dbaglobe.com:21000
Server version: impalad version 2.11.0-cdh5.14.0 RELEASE (build d68206561bce6b26762d62c01a78e6cd27aa7690)
***********************************************************************************
Welcome to the Impala shell.
(Impala Shell v2.11.0-cdh5.14.0 (d682065) built on Sat Jan 6 13:27:16 PST 2018)
Press TAB twice to see a list of available commands.
***********************************************************************************
[cdh-vm.dbaglobe.com:21000] > -- Find out their manager name & department size
> select d.dept_name, concat(m.first_name,' ',m.last_name) as manager, count(e.emp_no) employees
> from employees.departments d
> join employees.dept_manager dm on d.dept_no = dm.dept_no
> join employees.employees m on dm.emp_no = m.emp_no
> join employees.dept_emp de on d.dept_no = de.dept_no
> join employees.employees e on de.emp_no = e.emp_no
> where de.to_date >'2018-01-01'
> and dm.to_date > '2018-01-01'
> group by d.dept_name, concat(m.first_name,' ',m.last_name)
> order by d.dept_name;
Query: -- Find out their manager name & department size
select d.dept_name, concat(m.first_name,' ',m.last_name) as manager, count(e.emp_no) employees
from employees.departments d
join employees.dept_manager dm on d.dept_no = dm.dept_no
join employees.employees m on dm.emp_no = m.emp_no
join employees.dept_emp de on d.dept_no = de.dept_no
join employees.employees e on de.emp_no = e.emp_no
where de.to_date >'2018-01-01'
and dm.to_date > '2018-01-01'
group by d.dept_name, concat(m.first_name,' ',m.last_name)
order by d.dept_name
Query submitted at: 2018-02-18 20:58:51 (Coordinator: http://cdh-vm.dbaglobe.com:25000)
Query progress can be monitored at: http://cdh-vm.dbaglobe.com:25000/query_plan?query_id=a04e8317637c0e4a:a83017f00000000
+--------------------+-------------------+-----------+
| dept_name | manager | employees |
+--------------------+-------------------+-----------+
| Customer Service | Yuchang Weedman | 17569 |
| Development | Leon DasSarma | 61386 |
| Finance | Isamu Legleitner | 12437 |
| Human Resources | Karsten Sigstam | 12898 |
| Marketing | Vishwani Minakawa | 14842 |
| Production | Oscar Ghazalie | 53304 |
| Quality Management | Dung Pesch | 14546 |
| Research | Hilary Kambil | 15441 |
| Sales | Hauke Zhang | 37701 |
+--------------------+-------------------+-----------+
Fetched 9 row(s) in 19.43s
Run SQL in Mysql:
Server version: 5.5.56-MariaDB MariaDB Server
Copyright (c) 2000, 2017, Oracle, MariaDB Corporation Ab and others.
Type 'help;' or '\h' for help. Type '\c' to clear the current input statement.
MariaDB [employees]> -- Find out their manager name & department size
MariaDB [employees]> select d.dept_name, concat(m.first_name,' ',m.last_name) as manager, count(e.emp_no) employees
-> from employees.departments d
-> join employees.dept_manager dm on d.dept_no = dm.dept_no
-> join employees.employees m on dm.emp_no = m.emp_no
-> join employees.dept_emp de on d.dept_no = de.dept_no
-> join employees.employees e on de.emp_no = e.emp_no
-> where de.to_date >'2018-01-01'
-> and dm.to_date > '2018-01-01'
-> group by d.dept_name, concat(m.first_name,' ',m.last_name)
-> order by d.dept_name;
+--------------------+-------------------+-----------+
| dept_name | manager | employees |
+--------------------+-------------------+-----------+
| Customer Service | Yuchang Weedman | 17569 |
| Development | Leon DasSarma | 61386 |
| Finance | Isamu Legleitner | 12437 |
| Human Resources | Karsten Sigstam | 12898 |
| Marketing | Vishwani Minakawa | 14842 |
| Production | Oscar Ghazalie | 53304 |
| Quality Management | Dung Pesch | 14546 |
| Research | Hilary Kambil | 15441 |
| Sales | Hauke Zhang | 37701 |
+--------------------+-------------------+-----------+
9 rows in set (1.62 sec)
Re-write SQL in Pig-Latin:
-- pig script
-- Find out department size and their manager name
d0 = LOAD 'employees.departments' USING org.apache.hive.hcatalog.pig.HCatLoader();
dm0 = LOAD 'employees.dept_manager' USING org.apache.hive.hcatalog.pig.HCatLoader();
de0 = LOAD 'employees.dept_emp' USING org.apache.hive.hcatalog.pig.HCatLoader();
e0 = LOAD 'employees.employees' USING org.apache.hive.hcatalog.pig.HCatLoader();
d1 = FOREACH d0 GENERATE dept_no, dept_name;
dm1 = FOREACH (FILTER dm0 BY to_date >'2018-01-01') GENERATE dept_no, emp_no;
de1 = FOREACH (FILTER de0 by to_date >'2018-01-01') GENERATE dept_no, emp_no;
e1 = FOREACH e0 GENERATE emp_no, CONCAT(first_name,' ',last_name) AS fullname;
d1_dm1 = JOIN d1 BY dept_no, dm1 BY dept_no;
d1_dm1_e1 = JOIN d1_dm1 BY emp_no, e1 BY emp_no;
dept_mgr = FOREACH d1_dm1_e1 GENERATE d1_dm1::d1::dept_no AS dept_no, d1_dm1::d1::dept_name AS dept_name, e1::fullname AS manager;
d1_de1 = JOIN d1 BY dept_no, de1 BY dept_no;
d1_de1_e1 = FOREACH (JOIN d1_de1 BY emp_no, e1 BY emp_no) GENERATE d1_de1::d1::dept_no,e1::emp_no;
dept_emp_count = FOREACH (GROUP d1_de1_e1 BY dept_no) GENERATE group AS dept_no, COUNT(d1_de1_e1) AS employees;
dept_info_0 = JOIN dept_mgr BY dept_no, dept_emp_count BY dept_no;
dept_info_1 = FOREACH dept_info_0 GENERATE dept_mgr::dept_name AS dept_name, dept_mgr::manager AS manager, dept_emp_count::employees AS employees;
dept_info_2 = ORDER dept_info_1 BY dept_name;
DUMP dept_info_2;
[donghua@cdh-vm temp]$ date;pig -4 log4j.properties emp.pig;date;
Sun Feb 18 22:09:44 +08 2018
(Customer Service,Yuchang Weedman,17569)
(Development,Leon DasSarma,61386)
(Finance,Isamu Legleitner,12437)
(Human Resources,Karsten Sigstam,12898)
(Marketing,Vishwani Minakawa,14842)
(Production,Oscar Ghazalie,53304)
(Quality Management,Dung Pesch,14546)
(Research,Hilary Kambil,15441)
(Sales,Hauke Zhang,37701)
Sun Feb 18 22:16:08 +08 2018
No comments:
Post a Comment