Numpy and Pandas 用法整理
numpy属性
import numpy as np
array=np.array([[1,2,3],[2,3,4]])
#array数组 matrix 矩阵
print(array)
print('number of dim:',array.ndim)#看几维的数组
print("shape:",array.shape)#看行数列数
print("size:",array.size)#看有几个元素
[[1 2 3][2 3 4]]
number of dim: 2
shape: (2, 3)
size: 6numpy创建array
import numpy as np
a=np.array([2,23,4],dtype=np.float)#创建数组并且定义为浮点型
print(a.dtype)#打印种类a=np.array([[2,23,4],[2,23,4]])
print(a)
a=np.zeros((3,4))#3x40矩阵
a=np.arange(10,20,2)#生成有序数列
np.arange(12).reshape((3,4))#reshape改变形状
print(a)a=np.linspace(1,10,5)#1到10等距四段
print(a)
float64
[[ 2 23 4][ 2 23 4]]
[10 12 14 16 18]
[ 1. 3.25 5.5 7.75 10. ]numpy的基础运算
import numpy as np
a=np.array([10,20,30,40])
b=np.arange(4)
print(a,b)
c=a-b
c=b**2#平方c=10*np.tan(a)#tan函数
print(c)
[10 20 30 40] [0 1 2 3]
[ 6.48360827 22.37160944 -64.05331197 -11.17214931]
import numpy as np
a=np.array([10,20,30,40])
b=np.arange(4)
print(b<3)
print(b==3)#判断哪些等于3
[ True True True False]
[False False False True]
#逐个相乘和矩阵相乘
import numpy as np
a=np.array([[1,1],[0,1]])
b=np.arange(4).reshape(2,2)
c=a*b
print(c)
c_dot=np.dot(a,b)#矩阵相乘
print(c_dot)
[[0 1][0 3]]
[[2 4][2 3]]
#求和最小最大
import numpy as np
a=np.random.random((2,4))
print(a)
#a=(np.sum(a,axis=1))
#a=(np.max(a,axis=1))
a=(np.min(a,axis=1))
print(a)
[[0.35129081 0.6504631 0.2607104 0.49290976][0.35479875 0.33376722 0.4280792 0.85249914]]
[0.2607104 0.33376722]numpy 基础运算2
import numpy as np
a= np.arange(2,14).reshape((3,4))
print(a)
print(np.argmin(a))#打印最小值的索引
print(np.average(a))#打印平均值
print(np.mean(a))#打印平均值
print(np.median(a))#中位数
print(np.diff(a))#一阶差分
[[ 2 3 4 5][ 6 7 8 9][10 11 12 13]]
0
7.5
7.5
7.5
[[1 1 1][1 1 1][1 1 1]]
import numpy as np
a=np.arange(14,2,-1).reshape((3,4))
print(a)
print(np.sort(a))#每行排序
print(a.T)#矩阵转置
print(np.clip(a,5,9))#大于9为9,小于5为5
print(np.mean(a,axis=0))#列平均
[[14 13 12 11][10 9 8 7][ 6 5 4 3]]
[[11 12 13 14][ 7 8 9 10][ 3 4 5 6]]
[[14 10 6][13 9 5][12 8 4][11 7 3]]
[[9 9 9 9][9 9 8 7][6 5 5 5]]
[10. 9. 8. 7.]numpy索引
import numpy as np
a=np.arange(3,15).reshape((3,4))
print(a)
print(a[2][3])#看3行4列的值
print(a[2])
print(a[2,:])#打印第二行所有数
[[ 3 4 5 6][ 7 8 9 10][11 12 13 14]]
14
[11 12 13 14]
[11 12 13 14]
import numpy as np
a=np.arange(3,15).reshape((3,4))
print(a)
for row in a:#行迭代print(row)
[[ 3 4 5 6][ 7 8 9 10][11 12 13 14]]
[3 4 5 6]
[ 7 8 9 10]
[11 12 13 14]
import numpy as np
a=np.arange(3,15).reshape((3,4))
print(a)
for row in a.T:#列迭代print(row)
[[ 3 4 5 6][ 7 8 9 10][11 12 13 14]]
[ 3 7 11]
[ 4 8 12]
[ 5 9 13]
[ 6 10 14]
import numpy as np
a=np.arange(3,15).reshape((3,4))
print(a)
for row in a.flat:#a.flat变成一维print(row)
[[ 3 4 5 6][ 7 8 9 10][11 12 13 14]]
3
4
5
6
7
8
9
10
11
12
13
14
numpy 的array合并
import numpy as np
a=np.array([1,1,1])
b=np.array([2,2,2])
c=np.concatenate((a,b),axis=0)#合并ab
#c=np.vstack((a,b))#上下合并a,b
#c=np.hstack((a,b))#左右合并a,b
print(c,c.shape)
[1 1 1 2 2 2] (6,)
array的分割
import numpy as np
a=np.arange(12).reshape((3,4))
print(a)
print(np.split(a,2,axis=1))#按列进行分两块
print(np.split(a,3,axis=0))#按行进行分三块
[[ 0 1 2 3][ 4 5 6 7][ 8 9 10 11]]
[array([[0, 1],[4, 5],[8, 9]]), array([[ 2, 3],[ 6, 7],[10, 11]])]
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8, 9, 10, 11]])]
11_pandas_intro
import pandas as pd
import numpy as np
s=pd.Series([1,3,4,np.nan,4,1])
print(s)
dates=pd.date_range("20160101",periods=6)#产生6个时间序列
df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=['A','B','C','D'])#dates做序列,ABCD做列名
df
0 1.0
1 3.0
2 4.0
3 NaN
4 4.0
5 1.0
dtype: float64
A | B | C | D | |
---|---|---|---|---|
2016-01-01 | 0.971028 | -0.693613 | -0.002452 | -0.385629 |
2016-01-02 | 0.632282 | 1.048019 | 0.509823 | -0.910899 |
2016-01-03 | -0.802543 | -0.723904 | -0.772236 | -0.079639 |
2016-01-04 | -1.048101 | 0.960971 | 0.674664 | 0.224410 |
2016-01-05 | -0.580963 | -0.806524 | -0.504699 | 0.690608 |
2016-01-06 | 0.516184 | -1.038919 | 0.920207 | 0.302334 |
print(df["B"])
2016-01-01 -0.693613
2016-01-02 1.048019
2016-01-03 -0.723904
2016-01-04 0.960971
2016-01-05 -0.806524
2016-01-06 -1.038919
Freq: D, Name: B, dtype: float64
df2 = pd.DataFrame({'A' : 1.,'B' : pd.Timestamp('20130102'),'C' : pd.Series(1,index=list(range(4)),dtype='float32'),'D' : np.array([3] * 4,dtype='int32'),'E' : pd.Categorical(["test","train","test","train"]),'F' : 'foo'})
print(df2)#自动产生序号
print(df2["A"].dtypes)#打印A这一列的型
A B C D E F
0 1.0 2013-01-02 1.0 3 test foo
1 1.0 2013-01-02 1.0 3 train foo
2 1.0 2013-01-02 1.0 3 test foo
3 1.0 2013-01-02 1.0 3 train foo
float64
print(df.index)
print(df.columns)
print(df2.values)#打印值
print(df2.describe())#看dataframe的各种数值型的特征,平均,标准差(std)等
DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04','2016-01-05', '2016-01-06'],dtype='datetime64[ns]', freq='D')
Index(['A', 'B', 'C', 'D'], dtype='object')
[[1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo'][1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo'][1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'test' 'foo'][1.0 Timestamp('2013-01-02 00:00:00') 1.0 3 'train' 'foo']]A C D
count 4.0 4.0 4.0
mean 1.0 1.0 3.0
std 0.0 0.0 0.0
min 1.0 1.0 3.0
25% 1.0 1.0 3.0
50% 1.0 1.0 3.0
75% 1.0 1.0 3.0
max 1.0 1.0 3.0
print(df.T)#转置
2016-01-01 2016-01-02 2016-01-03 2016-01-04 2016-01-05 2016-01-06
A 0.971028 0.632282 -0.802543 -1.048101 -0.580963 0.516184
B -0.693613 1.048019 -0.723904 0.960971 -0.806524 -1.038919
C -0.002452 0.509823 -0.772236 0.674664 -0.504699 0.920207
D -0.385629 -0.910899 -0.079639 0.224410 0.690608 0.302334
print(df.sort_index(axis=1,ascending=False))#对列倒序排序
D C B A
2016-01-01 -0.385629 -0.002452 -0.693613 0.971028
2016-01-02 -0.910899 0.509823 1.048019 0.632282
2016-01-03 -0.079639 -0.772236 -0.723904 -0.802543
2016-01-04 0.224410 0.674664 0.960971 -1.048101
2016-01-05 0.690608 -0.504699 -0.806524 -0.580963
2016-01-06 0.302334 0.920207 -1.038919 0.516184
print(df.sort_values(by='B'))#对df通过B进行正向排序
A B C D
2016-01-06 0.516184 -1.038919 0.920207 0.302334
2016-01-05 -0.580963 -0.806524 -0.504699 0.690608
2016-01-03 -0.802543 -0.723904 -0.772236 -0.079639
2016-01-01 0.971028 -0.693613 -0.002452 -0.385629
2016-01-04 -1.048101 0.960971 0.674664 0.224410
2016-01-02 0.632282 1.048019 0.509823 -0.910899
pandas的选择数据
import pandas as pd
import numpy as npdates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A', 'B', 'C', 'D'])
print(df['A'])#选择A这一列
print(df.A)
2013-01-01 0.694411
2013-01-02 0.382285
2013-01-03 -1.161164
2013-01-04 -0.111133
2013-01-05 0.346966
2013-01-06 -0.794061
Freq: D, Name: A, dtype: float64
2013-01-01 0.694411
2013-01-02 0.382285
2013-01-03 -1.161164
2013-01-04 -0.111133
2013-01-05 0.346966
2013-01-06 -0.794061
Freq: D, Name: A, dtype: float64
print(df[0:3])
print(df['20130102':'20130104'])
A B C D
2013-01-01 0.694411 0.742594 -0.890543 0.063027
2013-01-02 0.382285 -1.662041 -0.097722 -0.713275
2013-01-03 -1.161164 1.290674 2.099184 -0.190667A B C D
2013-01-02 0.382285 -1.662041 -0.097722 -0.713275
2013-01-03 -1.161164 1.290674 2.099184 -0.190667
2013-01-04 -0.111133 0.273296 -0.690603 -0.151940
# select by label: loc
print(df.loc['20130102'])
print(df.loc[:,['A','B']])
print(df.loc['20130102', ['A','B']])
A 0.382285
B -1.662041
C -0.097722
D -0.713275
Name: 2013-01-02 00:00:00, dtype: float64A B
2013-01-01 0.694411 0.742594
2013-01-02 0.382285 -1.662041
2013-01-03 -1.161164 1.290674
2013-01-04 -0.111133 0.273296
2013-01-05 0.346966 0.011969
2013-01-06 -0.794061 -1.731132
A 0.382285
B -1.662041
Name: 2013-01-02 00:00:00, dtype: float64
# select by position: iloc
print(df.iloc[3])
print(df.iloc[3, 1])
print(df.iloc[3:5,0:2])
print(df.iloc[[1,2,4],[0,2]])
A -0.111133
B 0.273296
C -0.690603
D -0.151940
Name: 2013-01-04 00:00:00, dtype: float64
0.27329633287109306A B
2013-01-04 -0.111133 0.273296
2013-01-05 0.346966 0.011969A C
2013-01-02 0.382285 -0.097722
2013-01-03 -1.161164 2.099184
2013-01-05 0.346966 -1.649647
# mixed selection: ix
print(df.ix[:3,['A','C']])
A C
2013-01-01 0.694411 -0.890543
2013-01-02 0.382285 -0.097722
2013-01-03 -1.161164 2.099184G:\Anaconda\lib\site-packages\ipykernel_launcher.py:2: DeprecationWarning:
.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexingSee the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
print(df[df.A>0])#条件删选
A B C D
2013-01-01 0.694411 0.742594 -0.890543 0.063027
2013-01-02 0.382285 -1.662041 -0.097722 -0.713275
2013-01-05 0.346966 0.011969 -1.649647 -0.890891
#改变值
import pandas as pd
import numpy as npdates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6,4), index=dates, columns=['A', 'B', 'C', 'D'])df.iloc[2,2] = 1111
df.loc['2013-01-03', 'D'] = 2222
df.A[df.A>0] = 0
df['F'] = np.nan
df['G'] = pd.Series([1,2,3,4,5,6], index=pd.date_range('20130101', periods=6))
print(df)
A B C D F G
2013-01-01 -0.106455 -1.578351 1.250689 -2.270213 NaN 1
2013-01-02 -1.043037 -1.222880 0.335619 -0.771527 NaN 2
2013-01-03 -1.803602 0.778370 1111.000000 2222.000000 NaN 3
2013-01-04 0.000000 1.026862 2.841678 -0.614898 NaN 4
2013-01-05 0.000000 0.250409 -1.468902 -1.472633 NaN 5
2013-01-06 0.000000 -0.399359 -0.064457 1.472004 NaN 6处理丢失的值
import pandas as pd
import numpy as npdates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.arange(24).reshape((6,4)), index=dates, columns=['A', 'B', 'C', 'D'])df.iloc[0,1] = np.nan
df.iloc[1,2] = np.nan
print(df.dropna(axis=0, how='any')) # how={'any', 'all'}
print(df.fillna(value=0))
print(pd.isnull(df))#看哪些是缺失值
A B C D
2013-01-03 8 9.0 10.0 11
2013-01-04 12 13.0 14.0 15
2013-01-05 16 17.0 18.0 19
2013-01-06 20 21.0 22.0 23A B C D
2013-01-01 0 0.0 2.0 3
2013-01-02 4 5.0 0.0 7
2013-01-03 8 9.0 10.0 11
2013-01-04 12 13.0 14.0 15
2013-01-05 16 17.0 18.0 19
2013-01-06 20 21.0 22.0 23A B C D
2013-01-01 False True False False
2013-01-02 False False True False
2013-01-03 False False False False
2013-01-04 False False False False
2013-01-05 False False False False
2013-01-06 False False False False
#读入数据
import pandas as pd# read from
data = pd.read_csv('student.csv')
print(data)# save to
data.to_pickle('student.pi
合并concat merge
import pandas as pd
import numpy as np# concatenating
# ignore index
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*2, columns=['a','b','c','d'])
print(df1)
print(df2)
print(df3)
res = pd.concat([df1, df2, df3], axis=0, ignore_index=True)#忽略之前的index,axis=0行合并
print(res)
a b c d
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0a b c d
0 1.0 1.0 1.0 1.0
1 1.0 1.0 1.0 1.0
2 1.0 1.0 1.0 1.0a b c d
0 2.0 2.0 2.0 2.0
1 2.0 2.0 2.0 2.0
2 2.0 2.0 2.0 2.0a b c d
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0
5 1.0 1.0 1.0 1.0
6 2.0 2.0 2.0 2.0
7 2.0 2.0 2.0 2.0
8 2.0 2.0 2.0 2.0
# join, ('inner', 'outer')
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'], index=[1,2,3])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
print(df1)
print(df2)
#res = pd.concat([df1, df2], axis=1, join='outer')#扩展合并,没有的项补nan
res = pd.concat([df1, df2], axis=1, join='inner')#删减合并,合并共同项
print(res)
a b c d
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0b c d e
2 1.0 1.0 1.0 1.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0a b c d b c d e
2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
res = pd.concat([df1, df2], axis=1, join_axes=[df1.index])#join_axes考虑第一个的索引
print(res)
a b c d b c d e
1 0.0 0.0 0.0 0.0 NaN NaN NaN NaN
2 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
3 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0
df1 = pd.DataFrame(np.ones((3,4))*0, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1, columns=['b','c','d', 'e'], index=[2,3,4])
print(df1)
print(df2)
res = df1.append(df2, ignore_index=True)
print(res)
a b c d
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0b c d e
2 1.0 1.0 1.0 1.0
3 1.0 1.0 1.0 1.0
4 1.0 1.0 1.0 1.0a b c d e
0 0.0 0.0 0.0 0.0 NaN
1 0.0 0.0 0.0 0.0 NaN
2 0.0 0.0 0.0 0.0 NaN
3 NaN 1.0 1.0 1.0 1.0
4 NaN 1.0 1.0 1.0 1.0
5 NaN 1.0 1.0 1.0 1.0G:\Anaconda\lib\site-packages\pandas\core\frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.To accept the future behavior, pass 'sort=False'.To retain the current behavior and silence the warning, pass 'sort=True'.sort=sort)
res = df1.append([df2, df3],ignore_index=True)
print(res)
a b c d e
0 0.0 0.0 0.0 0.0 NaN
1 0.0 0.0 0.0 0.0 NaN
2 0.0 0.0 0.0 0.0 NaN
3 NaN 1.0 1.0 1.0 1.0
4 NaN 1.0 1.0 1.0 1.0
5 NaN 1.0 1.0 1.0 1.0
6 2.0 2.0 2.0 2.0 NaN
7 2.0 2.0 2.0 2.0 NaN
8 2.0 2.0 2.0 2.0 NaNG:\Anaconda\lib\site-packages\pandas\core\frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.To accept the future behavior, pass 'sort=False'.To retain the current behavior and silence the warning, pass 'sort=True'.sort=sort)
s1 = pd.Series([1,2,3,4], index=['a','b','c','d'])
print(s1)
res = df1.append(s1, ignore_index=True)
print(res)
a 1
b 2
c 3
d 4
dtype: int64a b c d
0 0.0 0.0 0.0 0.0
1 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0
3 1.0 2.0 3.0 4.0
import pandas as pd# merging two df by key/keys. (may be used in database)
# simple example
left = pd.DataFrame({'key1': ['K0', 'K1', 'K2', 'K3'],'key2':['K0', 'K1', 'K1', 'K1'],'A': ['A0', 'A1', 'A2', 'A3'],'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K2', 'K3'],'key2':['K0', 'K1', 'K2', 'K3'],'C': ['C0', 'C1', 'C2', 'C3'],'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
print(right)
res = pd.merge(left, right, on='key')#基于key合并
print(res)
key1 key2 A B
0 K0 K0 A0 B0
1 K1 K1 A1 B1
2 K2 K1 A2 B2
3 K3 K1 A3 B3key1 key2 C D
0 K0 K0 C0 D0
1 K1 K1 C1 D1
2 K2 K2 C2 D2
3 K3 K3 C3 D3---------------------------------------------------------------------------KeyError Traceback (most recent call last)<ipython-input-68-254e3e1c6b3a> in <module>()13 print(left)14 print(right)
---> 15 res = pd.merge(left, right, on='key')#基于key合并16 print(res)G:\Anaconda\lib\site-packages\pandas\core\reshape\merge.py in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)59 right_index=right_index, sort=sort, suffixes=suffixes,60 copy=copy, indicator=indicator,
---> 61 validate=validate)62 return op.get_result()63 G:\Anaconda\lib\site-packages\pandas\core\reshape\merge.py in __init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)549 (self.left_join_keys,550 self.right_join_keys,
--> 551 self.join_names) = self._get_merge_keys()552 553 # validate the merge keys dtypes. We may need to coerceG:\Anaconda\lib\site-packages\pandas\core\reshape\merge.py in _get_merge_keys(self)855 right_keys.append(856 right._get_label_or_level_values(
--> 857 rk, stacklevel=stacklevel))858 else:859 # work-around for merge_asof(right_index=True)G:\Anaconda\lib\site-packages\pandas\core\generic.py in _get_label_or_level_values(self, key, axis, stacklevel)1380 values = self.axes[axis].get_level_values(key)._values1381 else:
-> 1382 raise KeyError(key)1383 1384 # Check for duplicatesKeyError: 'key'
# how = ['left', 'right', 'outer', 'inner']
res = pd.merge(left, right, on=['key1','key2'], how='left')
print(res)# indicator
df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
print(df1)
print(df2)
res = pd.merge(df1, df2, on='col1', how='outer', indicator=True)
key1 key2 A B C D
0 K0 K0 A0 B0 C0 D0
1 K1 K1 A1 B1 C1 D1
2 K2 K1 A2 B2 NaN NaN
3 K3 K1 A3 B3 NaN NaNcol1 col_left
0 0 a
1 1 bcol1 col_right
0 1 2
1 2 2
2 2 2
res = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')# merged by index
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],'B': ['B0', 'B1', 'B2']},index=['K0', 'K1', 'K2'])
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],'D': ['D0', 'D2', 'D3']},index=['K0', 'K2', 'K3'])
print(left)
print(right)
# left_index and right_index
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
res = pd.merge(left, right, left_index=True, right_index=True, how='inner')# handle overlapping
boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})
res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')
print(res)# join function in pandas is similar with merge. If know merge, you will understand join
Numpy and Pandas 用法整理相关推荐
- pandas 作图 统计_Pandas数据可视化工具——Seaborn用法整理(下)
本科数学,编程几乎零基础(之前只学过matlab)今年年初开始学习Python数据挖掘,找到了一个很好的平台--BigQuant,省去了安装Python和安装各种库的烦恼.我最近在开始了解机器学习,B ...
- python数据清洗工具、方法、过程整理归纳(一、数据清洗之常用工具——numpy,pandas)
文章目录 1 背景 2 常用工具 2.1 numpy 2.2 pandas 1 背景 数据清洗是整个数据分析过程的第一步,也是整个数据分析项目中最耗费时间的一步: 数据分析的过程决定了数据分析的准确性 ...
- python数据可视化工具 pandas_Pandas数据可视化工具——Seaborn用法整理(下)
在前一篇文章 Pandas数据可视化工具--Seaborn用法整理(上),我们了解了如何使用这些Seaborn代码绘制分布图和分类图.在本文中,我们将继续讨论Seaborn提供的一些其他以绘制不同类型 ...
- 数据基础---numpy、pandas使用教程
数组对象 Numpy库 Numpy最重要的一个特点是就是其N维数组对象,即ndarray,ndarray是一个通用的同构数据多维容器,其中的所有元素必须是相同类型的.每个数组都有一个shape(一个表 ...
- python panda用法_Python3 pandas用法大全
Python3 pandas用法大全 一.生成数据表 1.首先导入pandas库,一般都会用到numpy库,所以我们先导入备用: importnumpy as npimport pandas as p ...
- Python pandas用法
Python pandas用法 无味之味关注 12019.01.10 15:43:25字数 2,877阅读 91,914 介绍 在Python中,pandas是基于NumPy数组构建的,使数据预处理. ...
- Python 学习 —— Numpy 、Pandas 傻傻分不清楚
之前的文章里面谈到过,我从R转到Python上,一个很大的不习惯就是R的数据结构比较简单,但是Python的数据类型比较多,很容易就令人头脑混乱.但是今天学习了一下Udacity的课程,顿时就清楚多了 ...
- 【Python基础】101道Numpy、Pandas练习题,提升你的Python水平
无论是数据分析还是机器学习,数据的预处理必不可少.其中最常用.最基础的Python库非numpy和pandas莫属,很多初学者可能看了很多教程,但是很快就把用法忘光了. 光看不练假把式,今天向大家推荐 ...
- 【机器学习基础】前置知识(四):一文掌握Pandas用法
Pandas提供快速,灵活和富于表现力的数据结构,是强大的数据分析Python库. 本文收录于机器学习前置教程系列. 一.Series和DataFrame Pandas建立在NumPy之上,更多Num ...
最新文章
- 30分钟掌握ES6/ES2015核心内容
- 用户操作计算机系统的基本工具是什么,在Windows支持下,用户操作计算机系统的基本工具是______。...
- 当遭遇“用户增长”停滞,你应该怎么办?
- IE6下div宽高设置
- sublime快捷键收藏
- 谷歌浏览器的下载位置如何设置 Chrome浏览器下载路径设置方法简述
- 组件、局部的组件、表行组件、组件数据传递
- matlab 平行,MATLAB判断两条直线平行
- Rust: (作者 洛佳) 使用Rust编写操作系统(附录一):链接器参数
- vs2017官方下载地址
- deepin Linux 安装 tar,Linux Deepin 15.11安装更新firefox flash player
- Hankson 的趣味题
- python推理拟合函数
- Java8之深克隆与浅克隆
- Docker一键部署MySQL
- 语音识别第4讲:语音特征参数MFCC
- C++ 求最大公因数
- 供应链管理问题会否导致小米在印度市场受挫?
- FFMPEG+mp3+mp4转换
- IHE测试系列之一:胜利归来
热门文章
- access按职称计算平均年龄_新版全国二级ACCESS选择题第3套
- Delphi之东进数字语音卡(SS1)可复用源码 1
- Namo for Mac(DNS服务器配置工具)
- 供应商与企业签订的保密协议
- 中级财管电脑操作不会用计算机,2019年中级会计财务管理必背公式,计算机如何使用?开考必看!...
- 【最新评估报告】最先进的EDR并不完美,无法检测到常见的攻击
- 第一篇 初识庐山真面目 ——Unity 3D Shader(2)
- python定义函数实现判断年份是否为闰年
- js,e.pageX、pageY模态框拖动
- 概率统计基础(三):常见分布与假设检验