Solution - Partitioning using NYSE Data

As part of this topic we will see the solution for this exercise.

CREATE DATABASE training_nyse;
CREATE TABLE nyse_eod_stage (
  stockticker STRING,
  tradedate INT,
  openprice FLOAT,
  highprice FLOAT,
  lowprice FLOAT,
  closeprice FLOAT,
  volume BIGINT 
) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS textfile;

LOAD DATA LOCAL INPATH '/data/nyse_all/nyse_data'
  INTO TABLE nyse_eod_stage;

SELECT * FROM nyse_eod_stage LIMIT 10;
SELECT count(1) FROM nyse_eod_stage;

CREATE TABLE nyse_eod_part (
  stockticker STRING,
  tradedate INT,
  openprice FLOAT,
  highprice FLOAT,
  lowprice FLOAT,
  closeprice FLOAT,
  volume BIGINT 
)  PARTITIONED BY (tradeyear INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS textfile;

SET hive.exec.dynamic.partition.mode=nonstrict;

INSERT INTO TABLE nyse_eod_part PARTITION (tradeyear)
SELECT t.*, substring(tradedate, 1, 4) AS tradeyear FROM nyse_eod_stage t;

-- We can also use below query
INSERT INTO TABLE nyse_eod_part
SELECT t.*, substring(tradedate, 1, 4) AS tradeyear FROM nyse_eod_stage t;

Practice hive on state of the art Big Data cluster - https://labs.itversity.com
You can sign up for our courses on Udemy using $10 coupons - Udemy Coupons - Big Data Courses