Sample data files:
https://resources.oreilly.com/examples/0636920047704/blob/master/Learning%20Apache%20Pig%20-%20Working%20Files/Chapter%202/cities_small.txt
https://resources.oreilly.com/examples/0636920047704/blob/master/Learning%20Apache%20Pig%20-%20Working%20Files/Chapter%202/states.txt
[donghua@cdh-vm temp]$ pig -4 log4j.properties
grunt> cities = load 'cities_small.txt' as (name:chararray,state:chararray,pop:int);
grunt> aliases;
grunt> describe cities
cities: {name: chararray,state: chararray,pop: int}
grunt> \de cities
cities: {name: chararray,state: chararray,pop: int}
grunt> ca_cities = filter cities by (state=='CA');
grunt> dump ca_cities;
grunt> \d ca_cities
grunt> illustrate;
(South Gate,CA,96640)
--------------------------------------------------------------------
| cities | name:chararray | state:chararray | pop:int |
--------------------------------------------------------------------
| | South Gate | CA | 96640 |
--------------------------------------------------------------------
grunt> illustrate;
(Fresno,CA,476050)
--------------------------------------------------------------------
| cities | name:chararray | state:chararray | pop:int |
--------------------------------------------------------------------
| | Fresno | CA | 476050 |
--------------------------------------------------------------------
grunt> ordered_cities = order cities by pop desc;
grunt> states = load 'states.txt' as (rank:int,code:chararray,fullname:chararray,date_entered:chararray,year_entered:int);
grunt> cities_join_states = join cities by state, states by code;
grunt> illustrate cities_join_states;
(Fargo,ND,93531)
(39,ND,North Dakota,02-NOV,1889)
--------------------------------------------------------------------
| cities | name:chararray | state:chararray | pop:int |
--------------------------------------------------------------------
| | Fargo | ND | 93531 |
| | Fargo | ND | 93531 |
--------------------------------------------------------------------
--------------------------------------------------------------------------------------------------------------------------
| states | rank:int | code:chararray | fullname:chararray | date_entered:chararray | year_entered:int |
--------------------------------------------------------------------------------------------------------------------------
| | 39 | ND | North Dakota | 02-NOV | 1889 |
| | 39 | ND | North Dakota | 02-NOV | 1889 |
--------------------------------------------------------------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| cities_join_states | cities::name:chararray | cities::state:chararray | cities::pop:int | states::rank:int | states::code:chararray | states::fullname:chararray | states::date_entered:chararray | states::year_entered:int |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| | Fargo | ND | 93531 | 39 | ND | North Dakota | 02-NOV | 1889 |
| | Fargo | ND | 93531 | 39 | ND | North Dakota | 02-NOV | 1889 |
| | Fargo | ND | 93531 | 39 | ND | North Dakota | 02-NOV | 1889 |
| | Fargo | ND | 93531 | 39 | ND | North Dakota | 02-NOV | 1889 |
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
grunt> cities_join_states_short = foreach cities_join_states generate cities::name, states::fullname;
grunt> store cities_join_states_short into 'cities_join_states_short';
grunt> fs -ls cities_join_states_short
grunt> fs -cat cities_join_states_short/part-r-00000
grunt> cities_join_states_short = foreach (join cities by state, states by code) generate cities::name, states::fullname;
grunt> city_and_state = foreach cities generate name,state,pop*1.5;
grunt> cities_by_state = group cities by state;
grunt> \de cities_by_state;
cities_by_state: {group: chararray,cities: {(name: chararray,state: chararray,pop: int)}}
grunt> illustrate cities_by_state;
(Sioux Falls,SD,154997)
-----------------------------------------------------------------------
| cities | name:chararray | state:chararray | pop:int |
-----------------------------------------------------------------------
| | Sioux Falls | SD | 154997 |
| | Rapid City | SD | 65491 |
-----------------------------------------------------------------------
------------------------------------------------------------------------------------------------------------------------------
| cities_by_state | group:chararray | cities:bag{:tuple(name:chararray,state:chararray,pop:int)} |
------------------------------------------------------------------------------------------------------------------------------
| | SD | {(Sioux Falls, SD, 154997), (Rapid City, SD, 65491)} |
------------------------------------------------------------------------------------------------------------------------------
grunt> total_cities = foreach (group cities all) generate COUNT(cities);
grunt> \d total_cities;
(500)
grunt> cities_by_state = foreach (group cities by state) generate group, COUNT(cities);
grunt> \d cities_by_state;
grunt> cities_by_state = foreach (group cities by state parallel 3) generate group, COUNT(cities);
grunt> store cities_by_state into 'cities_by_state';
grunt> fs -ls cities_by_state
Found 4 items
-rw-r--r-- 1 donghua supergroup 0 2018-02-17 22:25 cities_by_state/_SUCCESS
-rw-r--r-- 1 donghua supergroup 113 2018-02-17 22:25 cities_by_state/part-r-00000
-rw-r--r-- 1 donghua supergroup 82 2018-02-17 22:25 cities_by_state/part-r-00001
-rw-r--r-- 1 donghua supergroup 86 2018-02-17 22:25 cities_by_state/part-r-00002
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment