Python数据分析第三天 - NumPy详解2

# %load hello.py
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

plt.rcParams['font.sans-serif'].insert(0, 'SimHei')
plt.rcParams['axes.unicode_minus'] = False
#config InlineBackend.figure_format = 'svg'

#广播机制
1. 两个数组后缘维度（数组对象的shape属性从后向前看对应的部分）相同，可以触发广播机制。
2. 两个数组后缘维度不相同，但是不相同的部分其中一方为1，也可以触发广播机制。

array1 = np.array([[0, 0, 0], [1, 1, 1], [2, 2, 2], [3, 3, 3]])
array2 = np.array([1, 2, 3])
array3 = np.array([[1],[2],[3],[4]])

array1 + array2

array([[1, 2, 3],
       [2, 3, 4],
       [3, 4, 5],
       [4, 5, 6]])

array1 +array3

array([[1, 1, 1],
       [3, 3, 3],
       [5, 5, 5],
       [7, 7, 7]])

array4 = np.random.randint(1,100,(3,1))
array5 = np.random.randint(1,100,(1,3))

array4

array([[71],
       [ 6],
       [30]], dtype=int32)

array5

array([[80, 23, 86]], dtype=int32)

array4 + array5

array([[151,  94, 157],
       [ 86,  29,  92],
       [110,  53, 116]], dtype=int32)

#通⽤⼀元函数

array6 = np.array([1,2,3,np.nan,4,5,np.inf,6,-np.inf])

array6[~np.isnan(array6)]

array([  1.,   2.,   3.,   4.,   5.,  inf,   6., -inf])

array6[~np.isinf(array6)]

array([ 1.,  2.,  3., nan,  4.,  5.,  6.])

array6[~np.isnan(array6) & ~np.isinf(array6)]

array([1., 2., 3., 4., 5., 6.])

x = np.linspace(0.1,9.9,120)
y1 = np.sin(x)
y2 = np.log(x)
y3 = np.log10(x)

plt.plot(x,y1)
plt.plot(x,y2)
plt.plot(x,y3)
#plt.legend()
plt.show()

#面试官：普通函数如何使其像NumPy的函数一样进行矢量化操作
#答：numpy中有一个vectorize的函数可以对普通函数进行矢量化处理，他是一个装饰器函数。

#面试官：什么是装饰器？
# 答：用一个函数去装饰另外一个函数或类，并为其提供额外的功能。

import random
import time

# 装饰器函数的参数也是一个函数，它代表了被装饰的函数
# 装饰器函数的返回值也是一个函数，它代表了带有装饰功能的函数
def record_time(func):

    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        print(f'消耗时间: {end - start:.3f}秒')
        return result

    return wrapper


@record_time
def download(filename):
    """下载文件"""
    print(f'开始下载{filename}.')
    time.sleep(2 + random.random() * 5)
    print(f'{filename}下载完成.')


@record_time
def upload(filename):
    """上传文件"""
    print(f'开始上传{filename}.')
    time.sleep(3 + random.random() * 6)
    print(f'{filename}上传完成.')


# download = record_time(download)
# upload = record_time(upload)
download('MySQL从删库到跑路.avi')
upload('Python从入门到住院.pdf')

开始下载MySQL从删库到跑路.avi.
MySQL从删库到跑路.avi下载完成.
消耗时间: 6.043秒
开始上传Python从入门到住院.pdf.
Python从入门到住院.pdf上传完成.
消耗时间: 6.707秒

#递归调用
from math import factorial as fac
@np.vectorize
def fac(n):
    if n == 0 :return 1
    return n * fac(n-1)
fac(5)

array(120)

from functools import lru_cache

@lru_cache(maxsize=128)
def fib(n):
    if n in {1,2}:return 1
    return fib(n-1) + fib(n-2)

for x in range(1,21):
    print(x,fib(x))

array7 = np.arange(1,10)
#fac = np.vectorize(fac)
fac(array7)

array([     1,      2,      6,     24,    120,    720,   5040,  40320,
       362880])

#通⽤⼆元函数

#NumPy其他函数

temp = np.random.randint(1,10,50)
temp

array([9, 2, 9, 2, 4, 9, 2, 9, 5, 8, 7, 3, 1, 5, 7, 9, 1, 9, 6, 5, 1, 1,
       6, 4, 2, 7, 6, 3, 8, 9, 2, 3, 6, 9, 1, 1, 1, 8, 7, 8, 1, 1, 2, 8,
       9, 1, 2, 5, 2, 5], dtype=int32)

np.unique(temp)

array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)

#堆叠

array8 = np.array([[1,1,1],[2,2,2],[3,3,3]])
array9 = np.array([[4,4,4],[5,5,5],[6,6,6]])

np.hstack((array8,array9))

array([[1, 1, 1, 4, 4, 4],
       [2, 2, 2, 5, 5, 5],
       [3, 3, 3, 6, 6, 6]])

np.vstack((array8,array9))

array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3],
       [4, 4, 4],
       [5, 5, 5],
       [6, 6, 6]])

np.stack((array8,array9))

array([[[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]],

       [[4, 4, 4],
        [5, 5, 5],
        [6, 6, 6]]])

#拆分

temp4 = np.hstack((array8,array9))

np.hsplit(temp4,3)

[array([[1, 1],
        [2, 2],
        [3, 3]]),
 array([[1, 4],
        [2, 5],
        [3, 6]]),
 array([[4, 4],
        [5, 5],
        [6, 6]])]

temp5 = np.vstack((array8,array9))

np.vsplit(temp5,3)

[array([[1, 1, 1],
        [2, 2, 2]]),
 array([[3, 3, 3],
        [4, 4, 4]]),
 array([[5, 5, 5],
        [6, 6, 6]])]

temp6 = np.random.randint(10,100,9)
temp6

array([91, 95, 78, 30, 19, 41, 63, 21, 93], dtype=int32)

np.append(temp6,[999,888])

array([ 91,  95,  78,  30,  19,  41,  63,  21,  93, 999, 888])

np.insert(temp6,2,[999,888])

array([ 91,  95, 999, 888,  78,  30,  19,  41,  63,  21,  93], dtype=int32)

np.insert(temp6,[2,-1],[999,888])

array([ 91,  95, 999,  78,  30,  19,  41,  63,  21, 888,  93], dtype=int32)

#按条件抽取元素（相当于布尔索引）

#按条件抽取元素
np.extract(temp6 > 60 ,temp6)

array([91, 95, 78, 63, 93], dtype=int32)

temp6

#根据多组条件分别对元素进行对应的处理
np.select([temp6<40,temp6>70],[temp6*2,temp6//2],50)  #([条件1,条件2],[事件1,事件2],else的结果)

array([45, 47, 39, 60, 38,  0,  0, 42, 46], dtype=int32)

#对满足和不满足的条件分别处理元素
np.where(temp6 < 50 , temp6 *10 , temp6 // 10)  # 条件 ，满足，不满足

array([  9,   9,   7, 300, 190, 410,   6, 210,   9], dtype=int32)

temp7 = np.roll(temp6,-1)
temp7

array([95, 78, 30, 19, 41, 63, 21, 93, 91], dtype=int32)

temp6

array([91, 95, 78, 30, 19, 41, 63, 21, 93], dtype=int32)

np.resize(temp6,(4,4))  # 改变数组的大小，拿源数据依次填充直到填满

array([[91, 95, 78, 30],
       [19, 41, 63, 21],
       [93, 91, 95, 78],
       [30, 19, 41, 63]], dtype=int32)

np.put(temp6,[3,4],[999,888]) #根据索引 直接修改源数据

temp6

array([ 91,  95,  78, 999, 888,  41,  63,  21,  93], dtype=int32)

np.place(temp6, temp6 == 999,30)
np.place(temp6, temp6 == 888,19)   #将刚才替换的999 888 改回原来的值

temp6

array([91, 95, 78, 30, 19, 41, 63, 21, 93], dtype=int32)

guido = plt.imread('wenjian/py.jpg')
guido

a, b, c = np.vsplit(guido[:480,:480],3)
a,b,c

guido_parts = np.array([np.hsplit(a,3),np.hsplit(b,3),np.hsplit(c,3)])
guido_parts.shape

(3, 3, 160, 160, 3)

plt.figure(figsize=(6,6))
plt.subplots_adjust(wspace=0.05,hspace=0.05)
for i in range(3):
    for j in range(3):
        plt.subplot(3,3,i*3+j+1)
        plt.imshow(guido_parts[i,j])
        plt.axis('off')
plt.show()

obama = plt.imread('wenjian/obama.jpg')
obama.shape

(750, 500, 3)

temp8 = (guido * 0.3 + obama * 0.7).astype('u1')
plt.imshow(temp8)

<matplotlib.image.AxesImage at 0x2be169f3a10>

#向量和行列式

u = np.array([5,1,3])
m1 = np.array([4,5,1])
m2 = np.array([5,1,5])

np.dot(u,m1) / (np.linalg.norm(u) * np.linalg.norm(m1))

np.float64(0.7302967433402215)

np.dot(u,m2) / (np.linalg.norm(u) * np.linalg.norm(m2))

np.float64(0.9704311900788593)

np.rad2deg(np.arccos(0.7302967433402215))

np.float64(43.08872313536282)

np.rad2deg(np.arccos(0.9704311900788593))

np.float64(13.967881205170064)

temp9 = np.random.randint(10,100,(3,3))
temp9

array([[16, 15, 90],
       [68, 87, 86],
       [79, 27, 32]], dtype=int32)

#行列式
np.linalg.det(temp9).round(0)

np.float64(-376668.0)

#矩阵

m1 = np.array([[1,0,2],[-1,3,1]])

m2 = np.array([[3,1],[2,1],[1,0]])

#矩阵乘法
m1 @ m2

array([[5, 1],
       [4, 2]])

#矩阵乘法
m3 = np.matmul(m1,m2)
m3

array([[5, 1],
       [4, 2]])

#转置
m3.T

array([[5, 4],
       [1, 2]])

#逆矩阵
m4 = np.linalg.inv(m3)
m4

array([[ 0.33333333, -0.16666667],
       [-0.66666667,  0.83333333]])

m3 @ m4

array([[ 1.00000000e+00, -1.11022302e-16],
       [ 0.00000000e+00,  1.00000000e+00]])

np.allclose(m3 @ m4 ,np.eye(2))

True

A = np.array([[1,2,1],[3,7,2],[2,2,1]])
B = np.array([8,23,9]).reshape(-1,1)

np.linalg.matrix_rank(A)

np.int64(3)

np.linalg.matrix_rank(np.hstack((A,B)))

np.int64(3)

np.linalg.inv(A) @ B

array([[1.],
       [2.],
       [3.]])

np.linalg.solve(A,B)

array([[1.],
       [2.],
       [3.]])

from scipy.ndimage import zoom,rotate,sobel

temp11 = zoom(guido,(0.5,1.5,1))

plt.imshow(temp11)

<matplotlib.image.AxesImage at 0x2be350e49e0>

temp12 = rotate(guido,30)
plt.imshow(temp12)

<matplotlib.image.AxesImage at 0x2be35082f60>

plt.imshow(sobel(guido))   #通过颜色对比物体边缘

<matplotlib.image.AxesImage at 0x2be3552aa50>

函数    说明
diag  以⼀维数组的形式返回⽅阵的对⻆线元素或将⼀维数组转换为⽅阵（⾮对⻆元素元素为0）
matmul  矩阵乘法运算
trace 计算对⻆线元素的和
norm 求矩阵或向量的范数
det 计算⾏列式的值
matrix_rank 计算矩阵的秩
eig 计算矩阵的特征值（eigenvalue）和特征向量（eigenvector）
inv 计算⾮奇异矩阵（ 阶⽅阵）的逆矩阵
pinv 计算矩阵的摩尔-彭若斯（Moore-Penrose）⼴义逆
qr QR分解（把矩阵分解成⼀个正交矩阵与⼀个上三⻆矩阵的积）
svd 计算奇异值分解（singular value decomposition）
solve 解线性⽅程组 ，其中 是⼀个⽅阵
lstsq 计算 的最⼩⼆乘解

#多项式

p1 = np.poly1d([3, 2, 1])
p2 = np.poly1d([1, 2, 3])
print(p1)
print(p2)

   2
3 x + 2 x + 1
   2
1 x + 2 x + 3

print(p1 + p2)

   2
4 x + 4 x + 4

print(p1 * p2)

   4     3      2
3 x + 8 x + 14 x + 8 x + 3

print(p1.deriv())

 
6 x + 2

print(p1.integ())

   3     2
1 x + 1 x + 1 x

print(p1.integ().deriv())

   2
3 x + 2 x + 1

from numpy.polynomial import Polynomial

p1 = Polynomial([1,2,3])
p1

p2 = Polynomial([3,2,1])
p2

p1 * p2

(p1*p2).deriv()

x = np.array([
 25000, 15850, 15500, 20500, 22000, 20010, 26050, 12500, 18500, 27300,
 15000, 8300, 23320, 5250, 5800, 9100, 4800, 16000, 28500, 32000,
 31300, 10800, 6750, 6020, 13300, 30020, 3200, 17300, 8835, 3500
])
y = np.array([
 2599, 1400, 1120, 2560, 1900, 1200, 2320, 800, 1650, 2200,
 980, 580, 1885, 600, 400, 800, 420, 1380, 1980, 3999,
 3800, 725, 520, 420, 1200, 4020, 350, 1500, 560, 500
])

#计算相关系数

np.corrcoef(x,y)

array([[1.        , 0.92275889],
       [0.92275889, 1.        ]])

plt.scatter(x,y)
plt.show()

a,b =np.polyfit(x,y,deg=1)
a,b

(np.float64(0.1103337164602619), np.float64(-294.8834372455541))

x1 = np.linspace(0,35000,120)
y1 = a * x1 + b

plt.scatter(x,y)
plt.plot(x1,y1,color='r',linewidth=4)
plt.show()