文章作者:Tyan 博客:noahsnail.com | | 简书
# pandas是一个用来进行数据分析的基于numpy的库 import pandas as pd import numpy as np # Series是一个一维的数据结构 # 用list构建Series series1 = pd.Series([3, 5, 'test', -5, 0.3]) print series1 0 3 1 5 2 test 3 -5 4 0.3 dtype: object # 用list, index构建Series series2 = pd.Series([3, 5, 'test', -5, 0.3], index = ['A', 'B', 'C', 'D', 'E']) print series2 A 3 B 5 C test D -5 E 0.3 dtype: object # 通过dict构建Series companies = {'Baidu': 400, 'Alibaba': 500, 'Tecent': 600, 'Jingdong': 300} series3 = pd.Series(companies) print series3 Alibaba 500 Baidu 400 Jingdong 300 Tecent 600 dtype: int64 # Series数据选择 # 通过index选择数据 print series3['Baidu'] # 选择多个数据 print series3[['Baidu', 'Tecent']] 400 Baidu 400 Tecent 600 dtype: int64 # 根据条件选择数据 print series3[series3 < 500] Baidu 400 Jingdong 300 dtype: int64 # 条件选择原理 print series3 < 500 temp = series3 < 500 print series3[temp] Alibaba False Baidu True Jingdong True Tecent False dtype: bool Baidu 400 Jingdong 300 dtype: int64 # Series元素赋值 print 'old value: ', series3['Baidu'] series3['Baidu'] = 450 print 'new value: ', series3['Baidu'] old value: 400 new value: 450 # 根据条件赋值 print 'old series: ' print series3 series3[series3 < 500] = 500 print 'new series: ' print series3 old series: Alibaba 500 Baidu 400 Jingdong 300 Tecent 600 dtype: int64 new series: Alibaba 500 Baidu 500 Jingdong 500 Tecent 600 dtype: int64 # Series数学运算 print 'Division: ' print series3 / 2 print 'Square: ' print series3 ** 2 print np.square(series3) Division: Alibaba 250.0 Baidu 250.0 Jingdong 250.0 Tecent 300.0 dtype: float64 Square: Alibaba 250000 Baidu 250000 Jingdong 250000 Tecent 360000 dtype: int64 Alibaba 250000 Baidu 250000 Jingdong 250000 Tecent 360000 dtype: int64 # 定义新的Series, 公司人数 people = {'Baidu': 50000, 'Alibaba': 45000, 'Tecent': 60000, 'Jingdong': 80000, 'Netease': 30000} series4 = pd.Series(people) print series4 Alibaba 45000 Baidu 50000 Jingdong 80000 Netease 30000 Tecent 60000 dtype: int64 # Series相加, series3没有Netease, 因此结果为NaN print series3 + series4 Alibaba 45500.0 Baidu 50500.0 Jingdong 80500.0 Netease NaN Tecent 60600.0 dtype: float64 # 判断数据是否数据缺失 print 'Netease' in series3 print 'Baidu' in series3 False True # 找出数据为null或非null的元素 result = series3 + series4 print result.notnull() print result.isnull() print result[result.isnull()] print result[result.isnull() != True] Alibaba True Baidu True Jingdong True Netease False Tecent True dtype: bool Alibaba False Baidu False Jingdong False Netease True Tecent False dtype: bool Netease NaN dtype: float64 Alibaba 45500.0 Baidu 50500.0 Jingdong 80500.0 Tecent 60600.0 dtype: float64