Numpy and Pandas 用法整理

    numpy属性

import numpy as np
array=np.array([[1,2,3],[2,3,4]])
#array数组 matrix 矩阵
print(array)
print('number of dim:',array.ndim)#看几维的数组
print("shape:",array.shape)#看行数列数
print("size:",array.size)#看有几个元素

[[1 2 3][2 3 4]]
number of dim: 2
shape: (2, 3)
size: 6numpy创建array

import numpy as np
a=np.array([2,23,4],dtype=np.float)#创建数组并且定义为浮点型
print(a.dtype)#打印种类a=np.array([[2,23,4],[2,23,4]])
print(a)
a=np.zeros((3,4))#3x40矩阵
a=np.arange(10,20,2)#生成有序数列
np.arange(12).reshape((3,4))#reshape改变形状
print(a)a=np.linspace(1,10,5)#1到10等距四段
print(a)

float64
[[ 2 23  4][ 2 23  4]]
[10 12 14 16 18]
[ 1.    3.25  5.5   7.75 10.  ]numpy的基础运算

import numpy as np
a=np.array([10,20,30,40])
b=np.arange(4)
print(a,b)
c=a-b
c=b**2#平方c=10*np.tan(a)#tan函数
print(c)

[10 20 30 40] [0 1 2 3]
[  6.48360827  22.37160944 -64.05331197 -11.17214931]

import numpy as np
a=np.array([10,20,30,40])
b=np.arange(4)
print(b<3)
print(b==3)#判断哪些等于3

[ True  True  True False]
[False False False  True]

#逐个相乘和矩阵相乘
import numpy as np
a=np.array([[1,1],[0,1]])
b=np.arange(4).reshape(2,2)
c=a*b
print(c)
c_dot=np.dot(a,b)#矩阵相乘
print(c_dot)

[[0 1][0 3]]
[[2 4][2 3]]

#求和最小最大
import numpy as np
a=np.random.random((2,4))
print(a)
#a=(np.sum(a,axis=1))
#a=(np.max(a,axis=1))
a=(np.min(a,axis=1))
print(a)

[[0.35129081 0.6504631  0.2607104  0.49290976][0.35479875 0.33376722 0.4280792  0.85249914]]
[0.2607104  0.33376722]numpy 基础运算2

import numpy as np
a= np.arange(2,14).reshape((3,4))
print(a)
print(np.argmin(a))#打印最小值的索引
print(np.average(a))#打印平均值
print(np.mean(a))#打印平均值
print(np.median(a))#中位数
print(np.diff(a))#一阶差分

[[ 2  3  4  5][ 6  7  8  9][10 11 12 13]]
0
7.5
7.5
7.5
[[1 1 1][1 1 1][1 1 1]]

import numpy as np
a=np.arange(14,2,-1).reshape((3,4))
print(a)
print(np.sort(a))#每行排序
print(a.T)#矩阵转置
print(np.clip(a,5,9))#大于9为9，小于5为5
print(np.mean(a,axis=0))#列平均

[[14 13 12 11][10  9  8  7][ 6  5  4  3]]
[[11 12 13 14][ 7  8  9 10][ 3  4  5  6]]
[[14 10  6][13  9  5][12  8  4][11  7  3]]
[[9 9 9 9][9 9 8 7][6 5 5 5]]
[10.  9.  8.  7.]numpy索引

import numpy as np
a=np.arange(3,15).reshape((3,4))
print(a)
print(a[2][3])#看3行4列的值
print(a[2])
print(a[2,:])#打印第二行所有数

[[ 3  4  5  6][ 7  8  9 10][11 12 13 14]]
14
[11 12 13 14]
[11 12 13 14]

import numpy as np
a=np.arange(3,15).reshape((3,4))
print(a)
for row in a:#行迭代print(row)

[[ 3  4  5  6][ 7  8  9 10][11 12 13 14]]
[3 4 5 6]
[ 7  8  9 10]
[11 12 13 14]

import numpy as np
a=np.arange(3,15).reshape((3,4))
print(a)
for row in a.T:#列迭代print(row)

[[ 3  4  5  6][ 7  8  9 10][11 12 13 14]]
[ 3  7 11]
[ 4  8 12]
[ 5  9 13]
[ 6 10 14]

import numpy as np
a=np.arange(3,15).reshape((3,4))
print(a)
for row in a.flat:#a.flat变成一维print(row)

[[ 3  4  5  6][ 7  8  9 10][11 12 13 14]]
3
4
5
6
7
8
9
10
11
12
13
14

numpy 的array合并

import numpy as np
a=np.array([1,1,1])
b=np.array([2,2,2])
c=np.concatenate((a,b),axis=0)#合并ab
#c=np.vstack((a,b))#上下合并a，b
#c=np.hstack((a,b))#左右合并a，b
print(c,c.shape)

[1 1 1 2 2 2] (6,)

array的分割

import numpy as np
a=np.arange(12).reshape((3,4))
print(a)
print(np.split(a,2,axis=1))#按列进行分两块
print(np.split(a,3,axis=0))#按行进行分三块

[[ 0  1  2  3][ 4  5  6  7][ 8  9 10 11]]
[array([[0, 1],[4, 5],[8, 9]]), array([[ 2,  3],[ 6,  7],[10, 11]])]
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8,  9, 10, 11]])]

11_pandas_intro

import pandas as pd
import numpy as np
s=pd.Series([1,3,4,np.nan,4,1])
print(s)
dates=pd.date_range("20160101",periods=6)#产生6个时间序列
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['A','B','C','D'])#dates做序列，ABCD做列名
df

0    1.0
1    3.0
2    4.0
3    NaN
4    4.0
5    1.0
dtype: float64

	A	B	C	D
2016-01-01	0.971028	-0.693613	-0.002452	-0.385629
2016-01-02	0.632282	1.048019	0.509823	-0.910899
2016-01-03	-0.802543	-0.723904	-0.772236	-0.079639
2016-01-04	-1.048101	0.960971	0.674664	0.224410
2016-01-05	-0.580963	-0.806524	-0.504699	0.690608
2016-01-06	0.516184	-1.038919	0.920207	0.302334

print(df["B"])

2016-01-01   -0.693613
2016-01-02    1.048019
2016-01-03   -0.723904
2016-01-04    0.960971
2016-01-05   -0.806524
2016-01-06   -1.038919
Freq: D, Name: B, dtype: float64

df2 = pd.DataFrame({'A' : 1.,'B' : pd.Timestamp('20130102'),'C' : pd.Series(1,index=list(range(4)),dtype='float32'),'D' : np.array([3] * 4,dtype='int32'),'E' : pd.Categorical(["test","train","test","train"]),'F' : 'foo'})
print(df2)#自动产生序号
print(df2["A"].dtypes)#打印A这一列的型

     A          B    C  D      E    F
0  1.0 2013-01-02  1.0  3   test  foo
1  1.0 2013-01-02  1.0  3  train  foo
2  1.0 2013-01-02  1.0  3   test  foo
3  1.0 2013-01-02  1.0  3  train  foo
float64

print(df.index)
print(df.columns)
print(df2.values)#打印值
print(df2.describe())#看dataframe的各种数值型的特征，平均，标准差（std）等

DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04','2016-01-05', '2016-01-06'],dtype='datetime64[ns]', freq='D')
Index(['A', 'B', 'C', 'D'], dtype='object')
[[1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo'][1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo'][1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo'][1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']]A    C    D
count  4.0  4.0  4.0
mean   1.0  1.0  3.0
std    0.0  0.0  0.0
min    1.0  1.0  3.0
25%    1.0  1.0  3.0
50%    1.0  1.0  3.0
75%    1.0  1.0  3.0
max    1.0  1.0  3.0

print(df.T)#转置

   2016-01-01  2016-01-02  2016-01-03  2016-01-04  2016-01-05  2016-01-06
A    0.971028    0.632282   -0.802543   -1.048101   -0.580963    0.516184
B   -0.693613    1.048019   -0.723904    0.960971   -0.806524   -1.038919
C   -0.002452    0.509823   -0.772236    0.674664   -0.504699    0.920207
D   -0.385629   -0.910899   -0.079639    0.224410    0.690608    0.302334

print(df.sort_index(axis=1,ascending=False))#对列倒序排序

                   D         C         B         A
2016-01-01 -0.385629 -0.002452 -0.693613  0.971028
2016-01-02 -0.910899  0.509823  1.048019  0.632282
2016-01-03 -0.079639 -0.772236 -0.723904 -0.802543
2016-01-04  0.224410  0.674664  0.960971 -1.048101
2016-01-05  0.690608 -0.504699 -0.806524 -0.580963
2016-01-06  0.302334  0.920207 -1.038919  0.516184

print(df.sort_values(by='B'))#对df通过B进行正向排序

                   A         B         C         D
2016-01-06  0.516184 -1.038919  0.920207  0.302334
2016-01-05 -0.580963 -0.806524 -0.504699  0.690608
2016-01-03 -0.802543 -0.723904 -0.772236 -0.079639
2016-01-01  0.971028 -0.693613 -0.002452 -0.385629
2016-01-04 -1.048101  0.960971  0.674664  0.224410
2016-01-02  0.632282  1.048019  0.509823 -0.910899

pandas的选择数据

import pandas as pd
import numpy as npdates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A', 'B', 'C', 'D'])
print(df['A'])#选择A这一列
print(df.A)

2013-01-01    0.694411
2013-01-02    0.382285
2013-01-03   -1.161164
2013-01-04   -0.111133
2013-01-05    0.346966
2013-01-06   -0.794061
Freq: D, Name: A, dtype: float64
2013-01-01    0.694411
2013-01-02    0.382285
2013-01-03   -1.161164
2013-01-04   -0.111133
2013-01-05    0.346966
2013-01-06   -0.794061
Freq: D, Name: A, dtype: float64

print(df[0:3])
print(df['20130102':'20130104'])

                   A         B         C         D
2013-01-01  0.694411  0.742594 -0.890543  0.063027
2013-01-02  0.382285 -1.662041 -0.097722 -0.713275
2013-01-03 -1.161164  1.290674  2.099184 -0.190667A         B         C         D
2013-01-02  0.382285 -1.662041 -0.097722 -0.713275
2013-01-03 -1.161164  1.290674  2.099184 -0.190667
2013-01-04 -0.111133  0.273296 -0.690603 -0.151940

# select by label: loc
print(df.loc['20130102'])
print(df.loc[:,['A','B']])
print(df.loc['20130102', ['A','B']])

A    0.382285
B   -1.662041
C   -0.097722
D   -0.713275
Name: 2013-01-02 00:00:00, dtype: float64A         B
2013-01-01  0.694411  0.742594
2013-01-02  0.382285 -1.662041
2013-01-03 -1.161164  1.290674
2013-01-04 -0.111133  0.273296
2013-01-05  0.346966  0.011969
2013-01-06 -0.794061 -1.731132
A    0.382285
B   -1.662041
Name: 2013-01-02 00:00:00, dtype: float64

# select by position: iloc
print(df.iloc[3])
print(df.iloc[3, 1])
print(df.iloc[3:5,0:2])
print(df.iloc[[1,2,4],[0,2]])

A   -0.111133
B    0.273296
C   -0.690603
D   -0.151940
Name: 2013-01-04 00:00:00, dtype: float64
0.27329633287109306A         B
2013-01-04 -0.111133  0.273296
2013-01-05  0.346966  0.011969A         C
2013-01-02  0.382285 -0.097722
2013-01-03 -1.161164  2.099184
2013-01-05  0.346966 -1.649647

# mixed selection: ix
print(df.ix[:3,['A','C']])

                   A         C
2013-01-01  0.694411 -0.890543
2013-01-02  0.382285 -0.097722
2013-01-03 -1.161164  2.099184G:\Anaconda\lib\site-packages\ipykernel_launcher.py:2: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexingSee the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated

print(df[df.A>0])#条件删选

                   A         B         C         D
2013-01-01  0.694411  0.742594 -0.890543  0.063027
2013-01-02  0.382285 -1.662041 -0.097722 -0.713275
2013-01-05  0.346966  0.011969 -1.649647 -0.890891

#改变值
import pandas as pd
import numpy as npdates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A', 'B', 'C', 'D'])df.iloc[2,2] = 1111
df.loc['2013-01-03', 'D'] = 2222
df.A[df.A>0] = 0
df['F'] = np.nan
df['G']  = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))
print(df)

                   A         B            C            D   F  G
2013-01-01 -0.106455 -1.578351     1.250689    -2.270213 NaN  1
2013-01-02 -1.043037 -1.222880     0.335619    -0.771527 NaN  2
2013-01-03 -1.803602  0.778370  1111.000000  2222.000000 NaN  3
2013-01-04  0.000000  1.026862     2.841678    -0.614898 NaN  4
2013-01-05  0.000000  0.250409    -1.468902    -1.472633 NaN  5
2013-01-06  0.000000 -0.399359    -0.064457     1.472004 NaN  6处理丢失的值

import pandas as pd
import numpy as npdates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])df.iloc[0,1] = np.nan
df.iloc[1,2] = np.nan
print(df.dropna(axis=0, how='any'))   # how={'any', 'all'}
print(df.fillna(value=0))
print(pd.isnull(df))#看哪些是缺失值

             A     B     C   D
2013-01-03   8   9.0  10.0  11
2013-01-04  12  13.0  14.0  15
2013-01-05  16  17.0  18.0  19
2013-01-06  20  21.0  22.0  23A     B     C   D
2013-01-01   0   0.0   2.0   3
2013-01-02   4   5.0   0.0   7
2013-01-03   8   9.0  10.0  11
2013-01-04  12  13.0  14.0  15
2013-01-05  16  17.0  18.0  19
2013-01-06  20  21.0  22.0  23A      B      C      D
2013-01-01  False   True  False  False
2013-01-02  False  False   True  False
2013-01-03  False  False  False  False
2013-01-04  False  False  False  False
2013-01-05  False  False  False  False
2013-01-06  False  False  False  False

#读入数据
import pandas as pd# read from
data = pd.read_csv('student.csv')
print(data)# save to
data.to_pickle('student.pi

合并concat merge

import pandas as pd
import numpy as np# concatenating
# ignore index
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])
print(df1)
print(df2)
print(df3)
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)#忽略之前的index，axis=0行合并
print(res)

     a    b    c    d
0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0a    b    c    d
0  1.0  1.0  1.0  1.0
1  1.0  1.0  1.0  1.0
2  1.0  1.0  1.0  1.0a    b    c    d
0  2.0  2.0  2.0  2.0
1  2.0  2.0  2.0  2.0
2  2.0  2.0  2.0  2.0a    b    c    d
0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0
3  1.0  1.0  1.0  1.0
4  1.0  1.0  1.0  1.0
5  1.0  1.0  1.0  1.0
6  2.0  2.0  2.0  2.0
7  2.0  2.0  2.0  2.0
8  2.0  2.0  2.0  2.0

# join, ('inner', 'outer')
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
print(df1)
print(df2)
#res = pd.concat([df1, df2], axis=1, join='outer')#扩展合并，没有的项补nan
res = pd.concat([df1, df2], axis=1, join='inner')#删减合并，合并共同项
print(res)

     a    b    c    d
1  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0
3  0.0  0.0  0.0  0.0b    c    d    e
2  1.0  1.0  1.0  1.0
3  1.0  1.0  1.0  1.0
4  1.0  1.0  1.0  1.0a    b    c    d    b    c    d    e
2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0

res = pd.concat([df1, df2], axis=1, join_axes=[df1.index])#join_axes考虑第一个的索引
print(res)

     a    b    c    d    b    c    d    e
1  0.0  0.0  0.0  0.0  NaN  NaN  NaN  NaN
2  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0
3  0.0  0.0  0.0  0.0  1.0  1.0  1.0  1.0

df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
print(df1)
print(df2)
res = df1.append(df2, ignore_index=True)
print(res)

     a    b    c    d
0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0b    c    d    e
2  1.0  1.0  1.0  1.0
3  1.0  1.0  1.0  1.0
4  1.0  1.0  1.0  1.0a    b    c    d    e
0  0.0  0.0  0.0  0.0  NaN
1  0.0  0.0  0.0  0.0  NaN
2  0.0  0.0  0.0  0.0  NaN
3  NaN  1.0  1.0  1.0  1.0
4  NaN  1.0  1.0  1.0  1.0
5  NaN  1.0  1.0  1.0  1.0G:\Anaconda\lib\site-packages\pandas\core\frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.To accept the future behavior, pass 'sort=False'.To retain the current behavior and silence the warning, pass 'sort=True'.sort=sort)

res = df1.append([df2, df3],ignore_index=True)
print(res)

     a    b    c    d    e
0  0.0  0.0  0.0  0.0  NaN
1  0.0  0.0  0.0  0.0  NaN
2  0.0  0.0  0.0  0.0  NaN
3  NaN  1.0  1.0  1.0  1.0
4  NaN  1.0  1.0  1.0  1.0
5  NaN  1.0  1.0  1.0  1.0
6  2.0  2.0  2.0  2.0  NaN
7  2.0  2.0  2.0  2.0  NaN
8  2.0  2.0  2.0  2.0  NaNG:\Anaconda\lib\site-packages\pandas\core\frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.To accept the future behavior, pass 'sort=False'.To retain the current behavior and silence the warning, pass 'sort=True'.sort=sort)

s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
print(s1)
res = df1.append(s1, ignore_index=True)
print(res)

a    1
b    2
c    3
d    4
dtype: int64a    b    c    d
0  0.0  0.0  0.0  0.0
1  0.0  0.0  0.0  0.0
2  0.0  0.0  0.0  0.0
3  1.0  2.0  3.0  4.0

import pandas as pd# merging two df by key/keys. (may be used in database)
# simple example
left = pd.DataFrame({'key1': ['K0', 'K1', 'K2', 'K3'],'key2':['K0', 'K1', 'K1', 'K1'],'A': ['A0', 'A1', 'A2', 'A3'],'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K2', 'K3'],'key2':['K0', 'K1', 'K2', 'K3'],'C': ['C0', 'C1', 'C2', 'C3'],'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
print(right)
res = pd.merge(left, right, on='key')#基于key合并
print(res)

  key1 key2   A   B
0   K0   K0  A0  B0
1   K1   K1  A1  B1
2   K2   K1  A2  B2
3   K3   K1  A3  B3key1 key2   C   D
0   K0   K0  C0  D0
1   K1   K1  C1  D1
2   K2   K2  C2  D2
3   K3   K3  C3  D3---------------------------------------------------------------------------KeyError                                  Traceback (most recent call last)<ipython-input-68-254e3e1c6b3a> in <module>()13 print(left)14 print(right)
---> 15 res = pd.merge(left, right, on='key')#基于key合并16 print(res)G:\Anaconda\lib\site-packages\pandas\core\reshape\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)59                          right_index=right_index, sort=sort, suffixes=suffixes,60                          copy=copy, indicator=indicator,
---> 61                          validate=validate)62     return op.get_result()63 G:\Anaconda\lib\site-packages\pandas\core\reshape\merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)549         (self.left_join_keys,550          self.right_join_keys,
--> 551          self.join_names) = self._get_merge_keys()552 553         # validate the merge keys dtypes. We may need to coerceG:\Anaconda\lib\site-packages\pandas\core\reshape\merge.py in _get_merge_keys(self)855                             right_keys.append(856                                 right._get_label_or_level_values(
--> 857                                     rk, stacklevel=stacklevel))858                         else:859                             # work-around for merge_asof(right_index=True)G:\Anaconda\lib\site-packages\pandas\core\generic.py in _get_label_or_level_values(self, key, axis, stacklevel)1380             values = self.axes[axis].get_level_values(key)._values1381         else:
-> 1382             raise KeyError(key)1383 1384         # Check for duplicatesKeyError: 'key'

# how = ['left', 'right', 'outer', 'inner']
res = pd.merge(left, right, on=['key1','key2'], how='left')
print(res)# indicator
df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
print(df1)
print(df2)
res = pd.merge(df1, df2, on='col1', how='outer', indicator=True)

  key1 key2   A   B    C    D
0   K0   K0  A0  B0   C0   D0
1   K1   K1  A1  B1   C1   D1
2   K2   K1  A2  B2  NaN  NaN
3   K3   K1  A3  B3  NaN  NaNcol1 col_left
0     0        a
1     1        bcol1  col_right
0     1          2
1     2          2
2     2          2

res = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')# merged by index
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],'B': ['B0', 'B1', 'B2']},index=['K0', 'K1', 'K2'])
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],'D': ['D0', 'D2', 'D3']},index=['K0', 'K2', 'K3'])
print(left)
print(right)
# left_index and right_index
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
res = pd.merge(left, right, left_index=True, right_index=True, how='inner')# handle overlapping
boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})
res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')
print(res)# join function in pandas is similar with merge. If know merge, you will understand join

Numpy and Pandas 用法整理相关推荐

pandas 作图统计_Pandas数据可视化工具——Seaborn用法整理（下）
本科数学,编程几乎零基础(之前只学过matlab)今年年初开始学习Python数据挖掘,找到了一个很好的平台--BigQuant,省去了安装Python和安装各种库的烦恼.我最近在开始了解机器学习,B ...
python数据清洗工具、方法、过程整理归纳（一、数据清洗之常用工具——numpy，pandas）
文章目录 1 背景 2 常用工具 2.1 numpy 2.2 pandas 1 背景数据清洗是整个数据分析过程的第一步,也是整个数据分析项目中最耗费时间的一步: 数据分析的过程决定了数据分析的准确性 ...
python数据可视化工具 pandas_Pandas数据可视化工具——Seaborn用法整理（下）
在前一篇文章 Pandas数据可视化工具--Seaborn用法整理(上),我们了解了如何使用这些Seaborn代码绘制分布图和分类图.在本文中,我们将继续讨论Seaborn提供的一些其他以绘制不同类型 ...
数据基础---numpy、pandas使用教程
数组对象 Numpy库 Numpy最重要的一个特点是就是其N维数组对象,即ndarray,ndarray是一个通用的同构数据多维容器,其中的所有元素必须是相同类型的.每个数组都有一个shape(一个表 ...
python panda用法_Python3 pandas用法大全
Python3 pandas用法大全一.生成数据表 1.首先导入pandas库,一般都会用到numpy库,所以我们先导入备用: importnumpy as npimport pandas as p ...
Python pandas用法
Python pandas用法无味之味关注 12019.01.10 15:43:25字数 2,877阅读 91,914 介绍在Python中,pandas是基于NumPy数组构建的,使数据预处理. ...
Python 学习 —— Numpy 、Pandas 傻傻分不清楚
之前的文章里面谈到过,我从R转到Python上,一个很大的不习惯就是R的数据结构比较简单,但是Python的数据类型比较多,很容易就令人头脑混乱.但是今天学习了一下Udacity的课程,顿时就清楚多了 ...
【Python基础】101道Numpy、Pandas练习题，提升你的Python水平
无论是数据分析还是机器学习,数据的预处理必不可少.其中最常用.最基础的Python库非numpy和pandas莫属,很多初学者可能看了很多教程,但是很快就把用法忘光了. 光看不练假把式,今天向大家推荐 ...
【机器学习基础】前置知识（四）：一文掌握Pandas用法
Pandas提供快速,灵活和富于表现力的数据结构,是强大的数据分析Python库. 本文收录于机器学习前置教程系列. 一.Series和DataFrame Pandas建立在NumPy之上,更多Num ...

Numpy and Pandas 用法整理

Numpy and Pandas 用法整理相关推荐

最新文章

热门文章