Python Script:
[donghua@cdh-vm ~]$ cat employees.py
import sys
for line in sys.stdin:
(employeeid,firstname,lastname) = line.split('\t')
# print function in python2 will introduce newline
sys.stdout.write(employeeid+'\t'+firstname+','+lastname)
Hive Script:
create table employees (employee_id int,first_name string,last_name string) stored as avro;
insert into employees values(1,'donghua','luo'),(2,'larry','elison'),(3,'tom','kyte');
add file /tmp/employees.py;
select transform(employee_id,first_name,last_name) using 'python employees.py' as (employee_id,full_name) from employees;
Sample output:
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/d> select transform(employee_id,first_name,last_name) using 'python employees.py' as (employee_id,full_name) from employees;
+--------------+---------------+--+
| employee_id | full_name |
+--------------+---------------+--+
| 1 | donghua,luo |
| 2 | larry,elison |
| 3 | tom,kyte |
+--------------+---------------+--+
3 rows selected (17.997 seconds)
No comments:
Post a Comment