CSV文件用UE打开,然后表头去掉,“替换为空,保存为csv格式
use USER;---实名用户 set mapred.job.queue.name=queue_0901_01; --设置队列 set mapred.job.queue.name=queue_gbd_ide_01; --设置队列–建表
drop table if exists hc_temp_1; create table if not exists hc_temp_1 (a string, b string, c string, d string, e string, f string) row format delimited fields terminated by "," stored as textfile; --以,结尾 限定行格式–数据量大用如下分开导出
drop table if exists hc_temp_2; create table if not exists hc_temp_2 as select a, b, c, d, e, f, pmod(abs(hash(openid)), 5) split_col from USER.zan_cj_ucp_20160113; --5 的话要导出6个文件–建议数据要导完一份再导另一份
set mapred.job.queue.name=queue_0901_01; set mapred.job.queue.name=queue_gbd_ide_01; select count(1) from hc_temp_2 where split_col=0 limit 1000000; select * from hc_temp_2 where split_col=0 limit 1000000;