思路: 利用xlrd读取execl的数据,然后load到hive表中
# -*- coding: utf-8 -*- import sys import os from imp import reload import xlrd from pypinyin import lazy_pinyin import time reload(sys) sys.setdefaultencoding('utf8') print('########################## execl的文件路径是:'+sys.argv[1]) print('########################## 将要生成的表名是:'+sys.argv[2]) #输入的xls文件 book = xlrd.open_workbook(sys.argv[1]) in_file = book.sheets()[0] #输出的数据文件 out_file_str = '/home/inf/zhangjishuai/execl_to_hive/data/' + sys.argv[2] +'--'+str(sys.argv[1]).replace('/','_')+str(time.time()) out_file = open(out_file_str,'wt') #生成表头 title_List = in_file.row_values(0) title_str = ','.join(title_List[i] for i in range(len(title_List))) #将汉字转成拼音 title_str1 = ''.join(lazy_pinyin(title_str)) title_str2 = title_str1.replace('(','_').replace(')','_') title = str('create table if not exists ' +sys.argv[2] + '(' +title_str2.replace(',',' string,')+\ ' string)'+' row format delimited fields terminated by ' + "'"'\\t'"'" ) #建表语句 table_sql =str('hive -e' + ' "' +title + '"') #生成数据文件 ncol = in_file.ncols nrow = in_file.nrows for i in range(1,nrow): text = [] for j in range(ncol): if in_file.cell(i,j).ctype == 3: year, month, day, hour, minute, second = xlrd.xldate_as_tuple(in_file.cell(i, j).value, book.datemode) date = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(time.mktime((year, month, day, hour, minute, second,0,0,0))+28800)) text.append(str(date)) else: text.append(in_file.cell(i,j).value) line = '\t'.join(str(text[i]) for i in range(len(text))) + '\n' out_file.write(line) out_file.close() #load语句 load = str('load data local inpath ' + "'" + out_file_str + "'" + ' overwrite into table '+sys.argv[2]) load_sql = str('hive -e' + ' "' +load + '"') #执行命令 os.system(table_sql) os.system(load_sql) print('########################## 表' + sys.argv[2] + '加载成功')