Think it as HASH based indexes in RDBMS, more suitable for high cardinanity data columns (e.g.: customer_id, product_id, station_id, etc)
Basic Bucket example:
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create table monthly_taxi_fleet6
. . . . . . . . . . . . . . . . . . . . . . .> (month char(7),fleet smallint,company varchar(50))
. . . . . . . . . . . . . . . . . . . . . . .> clustered by (company) into 3 buckets
. . . . . . . . . . . . . . . . . . . . . . .> stored as avro;
Example using Apache Hive version 1.1.0-cdh5.13.1, hive.enforce.bucketing=false by default
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into monthly_taxi_fleet6
. . . . . . . . . . . . . . . . . . . . . . .> select month,fleet,company from monthly_taxi_fleet;
[donghua@cdh-vm ~]$ hdfs dfs -ls -R /user/hive/warehouse/monthly_taxi_fleet6
-rwxrwxrwt 1 donghua hive 25483 2017-12-26 10:40 /user/hive/warehouse/monthly_taxi_fleet6/000000_0
-- hive.enforce.bucketing: Whether bucketing is enforced. If true, while inserting into the table, bucketing is enforced.
-- Default Value: Hive 0.x: false, Hive 1.x: false, Hive 2.x: removed, which effectively makes it always true (HIVE-12331)
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> set hive.enforce.bucketing=true;
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into monthly_taxi_fleet6
. . . . . . . . . . . . . . . . . . . . . . .> select month,fleet,company from monthly_taxi_fleet;
[donghua@cdh-vm ~]$ hdfs dfs -ls -R /user/hive/warehouse/monthly_taxi_fleet6
-rwxrwxrwt 1 donghua hive 13611 2017-12-26 10:43 /user/hive/warehouse/monthly_taxi_fleet6/000000_0
-rwxrwxrwt 1 donghua hive 6077 2017-12-26 10:43 /user/hive/warehouse/monthly_taxi_fleet6/000001_0
-rwxrwxrwt 1 donghua hive 6589 2017-12-26 10:43 /user/hive/warehouse/monthly_taxi_fleet6/000002_0
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> describe extended monthly_taxi_fleet6;
+-----------------------------+----------------------------------------------------+----------+--+
| col_name | data_type | comment |
+-----------------------------+----------------------------------------------------+----------+--+
| month | char(7) | |
| fleet | int | |
| company | varchar(50) | |
| | NULL | NULL |
| Detailed Table Information | Table(tableName:monthly_taxi_fleet6, dbName:default, owner:donghua, createTime:1514256031, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:month, type:char(7), comment:null), FieldSchema(name:fleet, type:smallint, comment:null), FieldSchema(name:company, type:varchar(50), comment:null)], location:hdfs://cdh-vm.dbaglobe.com:8020/user/hive/warehouse/monthly_taxi_fleet6, inputFormat:org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat, compressed:false, numBuckets:3, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.avro.AvroSerDe, parameters:{serialization.format=1}), bucketCols:[company], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{totalSize=26277, numRows=1128, rawDataSize=0, COLUMN_STATS_ACCURATE=true, numFiles=3, transient_lastDdlTime=1514256192}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE) | |
+-----------------------------+----------------------------------------------------+----------+--+
5 rows selected (0.075 seconds)
Advanced Bucket example: Partition + Bucketing + Sorted by
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> create table monthly_taxi_fleet7
. . . . . . . . . . . . . . . . . . . . . . .> (month char(7),fleet smallint)
. . . . . . . . . . . . . . . . . . . . . . .> partitioned by (company varchar(50))
. . . . . . . . . . . . . . . . . . . . . . .> clustered by (month) sorted by (month)into 3 buckets
. . . . . . . . . . . . . . . . . . . . . . .> stored as avro;
0: jdbc:hive2://cdh-vm.dbaglobe.com:10000/def> insert into monthly_taxi_fleet7
. . . . . . . . . . . . . . . . . . . . . . .> partition (company)
. . . . . . . . . . . . . . . . . . . . . . .> select month,fleet,company from monthly_taxi_fleet;
[donghua@cdh-vm ~]$ hdfs dfs -ls -R /user/hive/warehouse/monthly_taxi_fleet7
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=CityCab
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=CityCab/000000_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=CityCab/000001_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=CityCab/000002_0
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Comfort
-rwxrwxrwt 1 donghua hive 913 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Comfort/000000_0
-rwxrwxrwt 1 donghua hive 913 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Comfort/000001_0
-rwxrwxrwt 1 donghua hive 913 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Comfort/000002_0
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Individual Yellow- Top
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Individual Yellow- Top/000000_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Individual Yellow- Top/000001_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Individual Yellow- Top/000002_0
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Premier
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Premier/000000_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Premier/000001_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Premier/000002_0
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Prime
-rwxrwxrwt 1 donghua hive 765 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Prime/000000_0
-rwxrwxrwt 1 donghua hive 765 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Prime/000001_0
-rwxrwxrwt 1 donghua hive 766 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Prime/000002_0
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=SMRT
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=SMRT/000000_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=SMRT/000001_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=SMRT/000002_0
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Smart
-rwxrwxrwt 1 donghua hive 720 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Smart/000000_0
-rwxrwxrwt 1 donghua hive 719 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Smart/000001_0
-rwxrwxrwt 1 donghua hive 719 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=Smart/000002_0
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=TransCab
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=TransCab/000000_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=TransCab/000001_0
-rwxrwxrwt 1 donghua hive 865 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=TransCab/000002_0
drwxrwxrwt - donghua hive 0 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=YTC
-rwxrwxrwt 1 donghua hive 432 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=YTC/000000_0
-rwxrwxrwt 1 donghua hive 432 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=YTC/000001_0
-rwxrwxrwt 1 donghua hive 432 2017-12-26 11:05 /user/hive/warehouse/monthly_taxi_fleet7/company=YTC/000002_0
No comments:
Post a Comment