python3脚本使用sql loader批量导入字节文件并二次处理

xiaoxiao2021-02-28  58

由于使用cx_oracle连接oracle然后导入数据文件速度太慢(要导的数据有几百G),出于速度的考虑,选择了oracle自带的sql loader工具,使用python脚本写界面,与用户交互,然后使用system()去批量调用sql loader,对于导入失败的bad数据以及日志数据,我用了c++写了一个程序,把bad文件转换成了ok的数据,然后又写了一个脚本处理这些bad数据。 1、python3调用sqlldr工具导入数据的脚本实现。

#!C:\Python34\python.exe ''' by dairen 20170703 ''' from tkinter import * import tkinter.messagebox as messagebox import cx_Oracle import time import datetime import os class Application(Frame): def __init__(self, master=None): Frame.__init__(self, master,height=2000,width=4000) self.pack() self.createWidgets() def createWidgets(self): self.label_1 = Label(self,text = "文件起始日期:") self.label_2 = Label(self,text = "文件结束日期:") self.label_1.grid(row = 0,column = 0) self.label_2.grid(row = 1,column = 0) self.Input1 = Entry(self) self.Input2 = Entry(self) self.Input1.grid(row = 0,column = 1) self.Input2.grid(row = 1,column = 1) self.Button1 = Button(self, text='开始导入',width=20, command=self.runmain) self.Button1.grid(row =2,columnspan=2) def runmain(self): begin_date = self.Input1.get() or '20170607' end_date = self.Input2.get() or '20170607' messagebox.showinfo('Tips!', '您的输入是:%s 和 %s \n 导入即将开始!' %(begin_date,end_date)) #offset:358O b_Info 40-60 aaa=[' CHAR(42)',' CHAR(42)',' CHAR(42)',' CHAR(8)',' CHAR(3)', ' CHAR(1)',' CHAR(1)',' CHAR(1)',' CHAR(1)',' CHAR(1)', ' CHAR(6)',' CHAR(10)',' CHAR(11)',' CHAR(11)',' CHAR(11)', ' CHAR(11)',' CHAR(11)',' CHAR(21)',' CHAR(21)',' CHAR(21)', ' CHAR(4)',' CHAR(6)',' CHAR(12)',' CHAR(4)',' CHAR(6)', ' CHAR(4)',' CHAR(3)',' CHAR(2)',' CHAR(12)',' CHAR(6)', ' CHAR(8)',' CHAR(15)',' CHAR(65)',' CHAR(3)',' CHAR(4)', ' CHAR(6)',' CHAR(10)',' CHAR(1)',' CHAR(1)',' CHAR(5)' ,' CHAR(2)',' CHAR(2)',' CHAR(2)',' CHAR(2)',' CHAR(4)' ,' CHAR(4)',' CHAR(11)',' CHAR(11)',' NUMBER(12,0)',' NUMBER(12,0)' ,' NUMBER(12,0)',' NUMBER(12,0)',' CHAR(3)',' CHAR(3)',' NUMBER(8,0)' ,' CHAR(1)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)' ,' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)' ,' NUMBER(8,0)',' CHAR(8)',' CHAR(8)',' CHAR(8)',' NUMBER(8,0)' ,' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)' ,' NUMBER(8,0)',' NUMBER(8,0)',' NUMBER(8,0)',' CHAR(16)',' CHAR(2)' ,' CHAR(1)',' CHAR(2)',' CHAR(14)',' CHAR(4)',' CHAR(3)' ,' CHAR(9)',' NUMBER(12,0)',' CHAR(3)',' CHAR(1)',' CHAR(8)' ,' CHAR(1)',' CHAR(12)',' CHAR(15)',' CHAR(6)',' CHAR(4)' ,' CHAR(100)'] lenlist=[42,42,42,8,3,1,1,1,1,1, 6,10,11,11,11,11,11,21,21,21, 4,6,12,4,6,4,3,2,12,6, 8,15,40,3,4,6,10,1,1,5, 2,2,2,2,4,4,11,11,12,12, 12,12,3,3,8,1,8,8,8,8, 8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,16,2, 1,2,14,4,3,9,12,3,1,8, 1,12,15,6,4,100] title=[ 'Main_Key','Origin_Key','Er_Key','PostDate','Curr', 'If_Curr','If_Cen','Cups_Sett','If_Cvm_In','If_Single', 'Ssn_Trans','Time_Trans','Ob_Code','Trans_Code','Rec_Code', 'Cb_Code','Cvm_Code','Account11','Account_In','Account_Out', 'MsgType','ProcCode','TranAmt','ActDate','ActTime', 'MerChant_Type','Sevr_Point_In','Sevr_Point_Con','Search','AuthCode', 'Term','Ob_Id','Ob_Info','Curr_Tran','Reason_Code', 'Ssn_Trans_Ori','Time_Trans_Ori','Status_Snd','Status_Rec','Status_Trans', 'RejCode1','RejCode2','RejCode3','RejCode4','Sett_Snd_AreaCode', 'Sett_Rec_AreaCode','Sett_Snd_Code','Sett_Rec_Code' ,'Amt_Snd_Sett_dr','Amt_Snd_Sett_cr', 'Amt_Rec_Sett_dr','Amt_Rec_Sett_cr','Curr_Sett_Snd','Curr_Sett_Rec','Comm', 'Comm_Way','Comm_Snd_dr','Comm_Snd_cr','Comm_Rec_dr','Comm_Rec_cr', 'Fee_Snd_dr','Fee_Snd_cr','Fee_Rec_dr','Fee_Rec_cr','Comm_Cen_dr', 'Comm_Cen_cr','Fee_Snd_Branch_dr','Fee_Snd_Branch_cr','Fee_Rec_Branch_dr','Fee_Rec_Branch_cr', 'Fee_CanCel_Snd_dr','Fee_CanCel_Snd_cr','Fee_CanCel_Rec_dr','Fee_CanCel_Rec_cr','Fee_CanCel_Cen_dr', 'Fee_CanCel_Cen_cr','Fee_Cl_Snd_Branch_dr','Fee_Cl_Snd_Branch_cr','Fee_Cl_Rec_Branch_dr','Fee_Cl_Rec_Branch_cr', 'TranType','CardKind','CardType','Bin','Card', 'Er_Max','Ssn_Cen','TranAmt_Ori','TranCode_Ori','Reserved', 'variab1','variab2','variab3','variab4','variab5','variab6' ] x=len(title) fields=[] fields = [title[i]+aaa[i] for i in range(0,x)] fields_str = ', '.join(fields) #begin_date=input("input start date(格式:20170506):") #end_date=input("input end date(格式:20170606):") date_list = [] begin_date = datetime.datetime.strptime(begin_date, "%Y%m%d") end_date = datetime.datetime.strptime(end_date, "%Y%m%d") if (begin_date > end_date): messagebox.showinfo('error!', 'begin_date or end_date error!)') while( begin_date <= end_date): date_str = begin_date.strftime("%Y%m%d") date_list.append(date_str) begin_date += datetime.timedelta(days=1) #print(date_list) file_name_list=[] numlist=['01','02','03','04'] for datename in date_list: for num in numlist: bsi_str='BSI%s01_%s'%(datename,num) file_name_list.append(bsi_str) for num in numlist: bsi_str='BSI%s51_%s'%(datename,num) file_name_list.append(bsi_str) file_name_list.append('BSI%s88_01'
转载请注明原文地址: https://www.6miu.com/read-1000223.html

最新回复(0)