942 rows × 1 columns
In [15]: pd.read_table('http://bit.ly/movieusers',sep='|') # 将数据按|进行分隔 Out[15]: 124Mtechnician857110253Fother940431323Mwriter320672424Mtechnician435373533Fother152134642Mexecutive981015757Madministrator913446836Madministrator052017929Mstudent0100281053Mlawyer9070391139Fother30329101228Fother06405111347Meducator29206121445Mscientist55106131549Feducator97301141621Mentertainment10309151730Mprogrammer06355161835Fother37212171940Mlibrarian02138182042Fhomemaker95660192126Mwriter30068202225Mwriter40206212330Fartist48197222421Fartist94533232539Mengineer55107242649Mengineer21044252740Flibrarian30030262832Mwriter55369272941Mprogrammer9404328307Mstudent55436293124Martist10003………………91291444Fother0810591391550Mentertainment6061491491627MengineerN2L5N91591722Fstudent2000691691840Mscientist7011691791925Mother1421691892030Fartist9000891992120Fstudent9880192092229Fadministrator2111492192321MstudentE2E3R92292429Mother1175392392518Fsalesman4903692492649Mentertainment0170192592723Mprogrammer5542892692821Mstudent5540892792944Mscientist5371192893028Fscientist0731092993160Meducator3355693093258Meducator0643793193328Mstudent4810593293461Mengineer2290293393542Mdoctor6622193493624Mother3278993593748Meducator9807293693838Ftechnician5503893793926Fstudent3331993894032Madministrator0221593994120Mstudent9722994094248Flibrarian7820994194322Mstudent77841942 rows × 5 columns
In [18]: pd.read_table('http://bit.ly/movieusers',sep='|',header=None) # 告诉pandas第一行不是表头 Out[18]: 012340124Mtechnician857111253Fother940432323Mwriter320673424Mtechnician435374533Fother152135642Mexecutive981016757Madministrator913447836Madministrator052018929Mstudent0100291053Mlawyer90703101139Fother30329111228Fother06405121347Meducator29206131445Mscientist55106141549Feducator97301151621Mentertainment10309161730Mprogrammer06355171835Fother37212181940Mlibrarian02138192042Fhomemaker95660202126Mwriter30068212225Mwriter40206222330Fartist48197232421Fartist94533242539Mengineer55107252649Mengineer21044262740Flibrarian30030272832Mwriter55369282941Mprogrammer9404329307Mstudent55436………………91391444Fother0810591491550Mentertainment6061491591627MengineerN2L5N91691722Fstudent2000691791840Mscientist7011691891925Mother1421691992030Fartist9000892092120Fstudent9880192192229Fadministrator2111492292321MstudentE2E3R92392429Mother1175392492518Fsalesman4903692592649Mentertainment0170192692723Mprogrammer5542892792821Mstudent5540892892944Mscientist5371192993028Fscientist0731093093160Meducator3355693193258Meducator0643793293328Mstudent4810593393461Mengineer2290293493542Mdoctor6622193593624Mother3278993693748Meducator9807293793838Ftechnician5503893893926Fstudent3331993994032Madministrator0221594094120Mstudent9722994194248Flibrarian7820994294322Mstudent77841943 rows × 5 columns
In [22]: #给pandas添加表头 user_cols = ['user_id','age','gender','occupation','zip_code'] #occupation n. 职业 zip code 邮编 pd.read_table('http://bit.ly/movieusers',sep='|',header=None, names=user_cols ) # 告诉pandas第一行不是表头 Out[22]: user_idagegenderoccupationzip_code0124Mtechnician857111253Fother940432323Mwriter320673424Mtechnician435374533Fother152135642Mexecutive981016757Madministrator913447836Madministrator052018929Mstudent0100291053Mlawyer90703101139Fother30329111228Fother06405121347Meducator29206131445Mscientist55106141549Feducator97301151621Mentertainment10309161730Mprogrammer06355171835Fother37212181940Mlibrarian02138192042Fhomemaker95660202126Mwriter30068212225Mwriter40206222330Fartist48197232421Fartist94533242539Mengineer55107252649Mengineer21044262740Flibrarian30030272832Mwriter55369282941Mprogrammer9404329307Mstudent55436………………91391444Fother0810591491550Mentertainment6061491591627MengineerN2L5N91691722Fstudent2000691791840Mscientist7011691891925Mother1421691992030Fartist9000892092120Fstudent9880192192229Fadministrator2111492292321MstudentE2E3R92392429Mother1175392492518Fsalesman4903692592649Mentertainment0170192692723Mprogrammer5542892792821Mstudent5540892892944Mscientist5371192993028Fscientist0731093093160Meducator3355693193258Meducator0643793293328Mstudent4810593393461Mengineer2290293493542Mdoctor6622193593624Mother3278993693748Meducator9807293793838Ftechnician5503893893926Fstudent3331993994032Madministrator0221594094120Mstudent9722994194248Flibrarian7820994294322Mstudent77841943 rows × 5 columns
In [23]: #给pandas添加表头 user_cols = ['user_id','age','gender','occupation','zip_code'] #occupation n. 职业 zip code 邮编 users = pd.read_table('http://bit.ly/movieusers',sep='|',header=None, names=user_cols ) # 告诉pandas第一行不是表头 In [25]: users.head() Out[25]: user_idagegenderoccupationzip_code0124Mtechnician857111253Fother940432323Mwriter320673424Mtechnician435374533Fother15213 In [ ]: