set required properties from hive cmd line:
hive> set mapreduce.job.reduces;
mapreduce.job.reduces=6
hive> set mapreduce.input.fileinputformat.split.minsize=64000000;
hive> set hive.exec.compress.intermediate=true;
hive> set hive.exec.compress.output=true;
hive> set mapreduce.output.fileoutputformat.compress.codec=org.apache.hadoop.io.compress.GzipCodec;
hive> set mapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.GzipCodec;
hive> show tables;
create table script:
hive> create external table largedeck_external
> (card_name string,card_type string,)
> row format delimited fields terminated by '|'
> location '/user/nagellarajashyam/deck_cards'
hive> create table largedeck_compress
> row format delimited fields terminated by ','
> as select * from largedeck_external;
hive> dfs -ls hdfs://nn01.itversity.com:8020/apps/hive/warehouse/nagellarajashyam.db/largedeck_compress;
Found 3 items
-rwxrwxrwx 3 nagellarajashyam hdfs 1301604 2016-12-22 01:54 hdfs://nn01.itversity.com:8020/apps/hive/warehouse/nagellarajashyam.db/largedeck_compress/000000_0.gz
-rwxrwxrwx 3 nagellarajashyam hdfs 1301602 2016-12-22 01:54 hdfs://nn01.itversity.com:8020/apps/hive/warehouse/nagellarajashyam.db/largedeck_compress/000001_0.gz
-rwxrwxrwx 3 nagellarajashyam hdfs 920323 2016-12-22 01:54 hdfs://nn01.itversity.com:8020/apps/hive/warehouse/nagellarajashyam.db/largedeck_compress/000002_0.gz
hive> describe formatted largedeck_compress;
OK
col_name data_type comment
card_name string
card_type string
card_no string
Detailed Table Information
Database: nagellarajashyam
Owner: nagellarajashyam
CreateTime: Thu Dec 22 01:54:29 EST 2016
LastAccessTime: UNKNOWN
Protect Mode: None
Retention: 0
Location: hdfs://nn01.itversity.com:8020/apps/hive/warehouse/nagellarajashyam.db/largedeck_compress
Table Type: MANAGED_TABLE
Table Parameters:
COLUMN_STATS_ACCURATE {“BASIC_STATS”:“true”}
numFiles 3
numRows 54525952
rawDataSize 672137216
totalSize 3523529
transient_lastDdlTime 1482389669
Storage Information
SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Compressed: No
Num Buckets: -1
Bucket Columns: []
Sort Columns: []
Storage Desc Params:
field.delim ,
serialization.format ,
Time taken: 0.175 seconds, Fetched: 34 row(s)