Q) How to create an external table in Hive with partition and ORC file format
create external table defaultsearch_rc(
bcookie string
,intl string
,src_spaceid string
,calday string
,prmpt int
,setyh int
,device_type string
,src string
,pn string
,hspart string
,hsimp string
,tsrc string
,vtestid string
,mtestid string
)
partitioned by (date_id int)
clustered by (src) into 4 buckets
row format delimited fields Terminated By '\001'
stored as rcfile location '/projects/CommerceFeed/default_search'
;
create external table defaultsearch_rc(
bcookie string
,intl string
,src_spaceid string
,calday string
,prmpt int
,setyh int
,device_type string
,src string
,pn string
,hspart string
,hsimp string
,tsrc string
,vtestid string
,mtestid string
)
partitioned by (date_id int)
clustered by (src) into 4 buckets
row format delimited fields Terminated By '\001'
stored as rcfile location '/projects/CommerceFeed/default_search'
;
alter table defaultsearch_rc add partition (date_id=20160801) location '/projects/CommerceFeed/default_search/20160801';
alter table defaultsearch_rc add partition (date_id=20160802) location '/projects/CommerceFeed/default_search/20160802';
Q) How to generate an ad-hoc report
Let's write Hive query in a file 'defaultSearchReport.txt'.
defaultSearchReport.txt:
----------------
use venkat_db;
select intl, src_spaceid, calday, count(*) as total_events, sum(prmpt) as prmpt, sum(setyh) as setyh
from defaultsearch_rc
where date_id=20160801
group by intl, src_spaceid, calday ;
----------------
hive -f defaultSearchReport.txt > output
cat output | awk -F'\t' '{ print $1 "," $2 "," $3 "," $4 "," $5 "," $6 }' > default_search_report_20160801.csv
No comments:
Post a Comment