Hive Snippets

Dump of Hive Table

hive -e ‘select * from cand_sr.cust_sr_all_1202_bank’ | sed ‘s/[\t]/,/g’  > /users/phodisvc/cust_sr_all_1202_bank.csv

 

Sequence in Hive

SELECT CASE WHEN tab.seq is null THEN 0 ELSE tab.seq END AS sequence FROM (SELECT MAX(run_id) AS seq FROM cand_sr.RUN_ID_SEQUENCE) tab

–#– RUN_ID_SEQUENCE
CREATE TABLE IF NOT EXISTS cand_sr.RUN_ID_SEQUENCE(RUN_ID    BIGINT)
ROW FORMAT SERDE ‘org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe’;

SELECT MAX(rowid) AS sequence FROM (SELECT run_id, ROW_NUMBER() OVER (ORDER BY run_id) AS rowid FROM cand_sr.RUN_ID_SEQUENCE) tab

 

Insert into Hive (one-time values)

–#– cand_sr_note_status_master
CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_status_master(
STATUS_ID    INT,
STATUS_VALUE    STRING)
ROW FORMAT SERDE ‘org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe’;

INSERT INTO TABLE cand_sr.cand_sr_note_status_master SELECT 1, ‘start’ FROM cand_sr.dual;
INSERT INTO TABLE cand_sr.cand_sr_note_status_master SELECT 2, ‘done’ FROM cand_sr.dual;
INSERT INTO TABLE cand_sr.cand_sr_note_status_master SELECT 3, ‘fail’ FROM cand_sr.dual;
INSERT INTO TABLE cand_sr.cand_sr_note_status_master SELECT 4, ‘json’ FROM cand_sr.dual;

 

Insert into Partition Table (using Staging Table)

–#– cand_sr_note_contract_detail
CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_contract_detail(
GUID    STRING,
CONTRACT_NUMBER    STRING)
PARTITIONED BY (RUN_ID    BIGINT)
ROW FORMAT SERDE ‘org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe’;

CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_contract_detail_tmp(
RUN_ID    BIGINT,
GUID    STRING,
CONTRACT_NUMBER    STRING)
ROW FORMAT SERDE ‘org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe’;

INSERT into table cand_sr.cand_sr_note_contract_detail PARTITION (RUN_ID = ?) select GUID, CONTRACT_NUMBER from cand_sr.cand_sr_note_contract_detail_tmp WHERE RUN_ID = ?

 

Insert into Partition Table (slow performance)

–#– cand_sr_note_hostname
CREATE TABLE IF NOT EXISTS cand_sr.cand_sr_note_hostname(
GUID STRING,
CONTRACT_NUMBER    STRING,
INCIDENT_NUMBER    STRING,
OPEN_STATUS    INT,
TYPE INT,
VALUE STRING,
IS_VALIDATED INT)
PARTITIONED BY (RUN_ID    BIGINT)
ROW FORMAT SERDE ‘org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe’;

insert into table cand_sr.cand_sr_note_hostname PARTITION (RUN_ID = ?) select ?,?,?,?,? from cand_sr.dual

 

Word Count with Hive

DROP TABLE IF EXISTS WORD_COUNT;
CREATE TABLE WORD_COUNT (LINE STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ‘\n’ STORED AS TEXTFILE;
LOAD DATA LOCAL INPATH ‘/users/phodisvc/sample.txt’ OVERWRITE INTO TABLE WORD_COUNT;
SELECT WORD, COUNT (*) FROM WORD_COUNT LATERAL VIEW EXPLODE (SPLIT (LINE, ‘ ‘)) LTABLE AS WORD GROUP BY WORD;

Leave a comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.