pandas是基于numpy构建的数据处理库。
pandas的数据结构介绍
Series
In [3]: from pandas import Series,DataFrame
In [4]: obj = Series([2,-4,5,7])
In [5]: obj
Out[5]:
0 2
1 -4
2 5
3 7
dtype: int64
In [6]: obj.values
Out[6]: array([ 2, -4, 5, 7], dtype=int64)
In [7]: obj.index
Out[7]: RangeIndex(start=0, stop=4, step=1)
In [8]: obj2 = Series([4,-2,-7,6],index = ['a','b','c','d'])
In [9]: obj2
Out[9]:
a 4
b -2
c -7
d 6
dtype: int64
In [10]: obj2.index
Out[10]: Index(['a', 'b', 'c', 'd'], dtype='object')
In [11]: obj2['d']
Out[11]: 6
In [13]: obj2[['d','a','c']]
Out[13]:
d 6
a 4
c -7
dtype: int64
In [14]: obj2[obj2 > 0]
Out[14]:
a 4
d 6
dtype: int64
In [15]: obj2 * 2
Out[15]:
a 8
b -4
c -14
d 12
dtype: int64
In [16]: np.exp(obj2)
Out[16]:
a 54.598150
b 0.135335
c 0.000912
d 403.428793
dtype: float64
In [17]: 'b' in obj2
Out[17]: True
In [18]: 'e' in obj2
Out[18]: False
In [19]: sdata = {'Ohio' : 122, 'Texas': 7000,'Oregon': 16000, 'Utah':999}
In [20]: obj3 = Series(sdata)
In [21]: obj3
Out[21]:
Ohio 122
Oregon 16000
Texas 7000
Utah 999
dtype: int64
In [22]: states = {'California', 'Ohio', 'Utah','Texas'}
In [23]: obj4 = Series(sdata,index =states)
In [24]: obj4
Out[24]:
Utah 999.0
Texas 7000.0
California NaN
Ohio 122.0
dtype: float64
In [25]: obj4.name = 'population'
In [26]: obj4.index.name = 'state'
In [27]: obj4
Out[27]:
state
Utah 999.0
Texas 7000.0
California NaN
Ohio 122.0
Name: population, dtype: float64