pandas 第1课

xiaoxiao2021-02-28  149

How do I read a tabular data file into pandas?¶

In [2]: import pandas as pd

In [6]: orders = pd.read_table('http://bit.ly/chiporders') #需要翻墙

In [8]: orders.head()

Out[8]: order_idquantityitem_namechoice_descriptionitem_price011Chips and Fresh Tomato SalsaNaN$2.39111Izze[Clementine]$3.39211Nantucket Nectar[Apple]$3.39311Chips and Tomatillo-Green Chili SalsaNaN$2.39422Chicken Bowl[Tomatillo-Red Chili Salsa (Hot), [Black Beans…$16.98

In [11]: pd.read_table('http://bit.ly/movieusers')

Out[11]: 1|24|M|technician|8571102|53|F|other|9404313|23|M|writer|3206724|24|M|technician|4353735|33|F|other|1521346|42|M|executive|9810157|57|M|administrator|9134468|36|M|administrator|0520179|29|M|student|01002810|53|M|lawyer|90703911|39|F|other|303291012|28|F|other|064051113|47|M|educator|292061214|45|M|scientist|551061315|49|F|educator|973011416|21|M|entertainment|103091517|30|M|programmer|063551618|35|F|other|372121719|40|M|librarian|021381820|42|F|homemaker|956601921|26|M|writer|300682022|25|M|writer|402062123|30|F|artist|481972224|21|F|artist|945332325|39|M|engineer|551072426|49|M|engineer|210442527|40|F|librarian|300302628|32|M|writer|553692729|41|M|programmer|940432830|7|M|student|554362931|24|M|artist|10003……912914|44|F|other|08105913915|50|M|entertainment|60614914916|27|M|engineer|N2L5N915917|22|F|student|20006916918|40|M|scientist|70116917919|25|M|other|14216918920|30|F|artist|90008919921|20|F|student|98801920922|29|F|administrator|21114921923|21|M|student|E2E3R922924|29|M|other|11753923925|18|F|salesman|49036924926|49|M|entertainment|01701925927|23|M|programmer|55428926928|21|M|student|55408927929|44|M|scientist|53711928930|28|F|scientist|07310929931|60|M|educator|33556930932|58|M|educator|06437931933|28|M|student|48105932934|61|M|engineer|22902933935|42|M|doctor|66221934936|24|M|other|32789935937|48|M|educator|98072936938|38|F|technician|55038937939|26|F|student|33319938940|32|M|administrator|02215939941|20|M|student|97229940942|48|F|librarian|78209941943|22|M|student|77841

942 rows × 1 columns

In [15]: pd.read_table('http://bit.ly/movieusers',sep='|') # 将数据按|进行分隔

Out[15]: 124Mtechnician857110253Fother940431323Mwriter320672424Mtechnician435373533Fother152134642Mexecutive981015757Madministrator913446836Madministrator052017929Mstudent0100281053Mlawyer9070391139Fother30329101228Fother06405111347Meducator29206121445Mscientist55106131549Feducator97301141621Mentertainment10309151730Mprogrammer06355161835Fother37212171940Mlibrarian02138182042Fhomemaker95660192126Mwriter30068202225Mwriter40206212330Fartist48197222421Fartist94533232539Mengineer55107242649Mengineer21044252740Flibrarian30030262832Mwriter55369272941Mprogrammer9404328307Mstudent55436293124Martist10003………………91291444Fother0810591391550Mentertainment6061491491627MengineerN2L5N91591722Fstudent2000691691840Mscientist7011691791925Mother1421691892030Fartist9000891992120Fstudent9880192092229Fadministrator2111492192321MstudentE2E3R92292429Mother1175392392518Fsalesman4903692492649Mentertainment0170192592723Mprogrammer5542892692821Mstudent5540892792944Mscientist5371192893028Fscientist0731092993160Meducator3355693093258Meducator0643793193328Mstudent4810593293461Mengineer2290293393542Mdoctor6622193493624Mother3278993593748Meducator9807293693838Ftechnician5503893793926Fstudent3331993894032Madministrator0221593994120Mstudent9722994094248Flibrarian7820994194322Mstudent77841

942 rows × 5 columns

In [18]: pd.read_table('http://bit.ly/movieusers',sep='|',header=None) # 告诉pandas第一行不是表头

Out[18]: 012340124Mtechnician857111253Fother940432323Mwriter320673424Mtechnician435374533Fother152135642Mexecutive981016757Madministrator913447836Madministrator052018929Mstudent0100291053Mlawyer90703101139Fother30329111228Fother06405121347Meducator29206131445Mscientist55106141549Feducator97301151621Mentertainment10309161730Mprogrammer06355171835Fother37212181940Mlibrarian02138192042Fhomemaker95660202126Mwriter30068212225Mwriter40206222330Fartist48197232421Fartist94533242539Mengineer55107252649Mengineer21044262740Flibrarian30030272832Mwriter55369282941Mprogrammer9404329307Mstudent55436………………91391444Fother0810591491550Mentertainment6061491591627MengineerN2L5N91691722Fstudent2000691791840Mscientist7011691891925Mother1421691992030Fartist9000892092120Fstudent9880192192229Fadministrator2111492292321MstudentE2E3R92392429Mother1175392492518Fsalesman4903692592649Mentertainment0170192692723Mprogrammer5542892792821Mstudent5540892892944Mscientist5371192993028Fscientist0731093093160Meducator3355693193258Meducator0643793293328Mstudent4810593393461Mengineer2290293493542Mdoctor6622193593624Mother3278993693748Meducator9807293793838Ftechnician5503893893926Fstudent3331993994032Madministrator0221594094120Mstudent9722994194248Flibrarian7820994294322Mstudent77841

943 rows × 5 columns

In [22]: #给pandas添加表头 user_cols = ['user_id','age','gender','occupation','zip_code'] #occupation n. 职业 zip code 邮编 pd.read_table('http://bit.ly/movieusers',sep='|',header=None, names=user_cols ) # 告诉pandas第一行不是表头

Out[22]: user_idagegenderoccupationzip_code0124Mtechnician857111253Fother940432323Mwriter320673424Mtechnician435374533Fother152135642Mexecutive981016757Madministrator913447836Madministrator052018929Mstudent0100291053Mlawyer90703101139Fother30329111228Fother06405121347Meducator29206131445Mscientist55106141549Feducator97301151621Mentertainment10309161730Mprogrammer06355171835Fother37212181940Mlibrarian02138192042Fhomemaker95660202126Mwriter30068212225Mwriter40206222330Fartist48197232421Fartist94533242539Mengineer55107252649Mengineer21044262740Flibrarian30030272832Mwriter55369282941Mprogrammer9404329307Mstudent55436………………91391444Fother0810591491550Mentertainment6061491591627MengineerN2L5N91691722Fstudent2000691791840Mscientist7011691891925Mother1421691992030Fartist9000892092120Fstudent9880192192229Fadministrator2111492292321MstudentE2E3R92392429Mother1175392492518Fsalesman4903692592649Mentertainment0170192692723Mprogrammer5542892792821Mstudent5540892892944Mscientist5371192993028Fscientist0731093093160Meducator3355693193258Meducator0643793293328Mstudent4810593393461Mengineer2290293493542Mdoctor6622193593624Mother3278993693748Meducator9807293793838Ftechnician5503893893926Fstudent3331993994032Madministrator0221594094120Mstudent9722994194248Flibrarian7820994294322Mstudent77841

943 rows × 5 columns

In [23]: #给pandas添加表头 user_cols = ['user_id','age','gender','occupation','zip_code'] #occupation n. 职业 zip code 邮编 users = pd.read_table('http://bit.ly/movieusers',sep='|',header=None, names=user_cols ) # 告诉pandas第一行不是表头

In [25]: users.head()

Out[25]: user_idagegenderoccupationzip_code0124Mtechnician857111253Fother940432323Mwriter320673424Mtechnician435374533Fother15213

In [ ]:

转载请注明原文地址: https://www.6miu.com/read-35888.html

最新回复(0)