1 Star 0 Fork 53

fosterkong/Book5_Essentials-of-Probability-and-Statistics

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
Bk5_Ch13_01.py 13.37 KB
一键复制 编辑 原始数据 按行查看 历史
Visualize-ML 提交于 2022-12-06 13:10 +08:00 . Add files via upload
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583
###############
# Authored by Weisheng Jiang
# Book 6 | From Basic Arithmetic to Machine Learning
# Published and copyrighted by Tsinghua University Press
# Beijing, China, 2022
###############
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris
# Load the iris data
iris_sns = sns.load_dataset("iris")
# A copy from Seaborn
iris = load_iris()
# A copy from Sklearn
X = iris.data
y = iris.target
feature_names = ['Sepal length, $X_1$','Sepal width, $X_2$',
'Petal length, $X_3$','Petal width, $X_4$']
# Convert X array to dataframe
X_df = pd.DataFrame(X, columns=feature_names)
#%% Heatmap of covariance matrix
SIGMA = X_df.cov()
fig, axs = plt.subplots()
h = sns.heatmap(SIGMA,cmap='rainbow', linewidths=.05,
annot=True,fmt='.2f')
h.set_aspect("equal")
h.set_title('Covariance matrix')
SIGMA_inv = np.linalg.inv(SIGMA)
fig, axs = plt.subplots()
h = sns.heatmap(SIGMA_inv,cmap='rainbow', linewidths=.05,
annot=True,fmt='.2f')
h.set_aspect("equal")
#%% compare covariance matrices given class label
f,(ax1,ax2,ax3) = plt.subplots(1,3,sharey=True)
g1 = sns.heatmap(X_df[y==0].cov(),cmap="rainbow",
annot=None,cbar=False,ax=ax1,square=True,
vmax = 0.4, vmin = 0)
ax1.set_title('Y = 0, setosa')
g2 = sns.heatmap(X_df[y==1].cov(),cmap="rainbow",
annot=None,cbar=False,ax=ax2,square=True,
vmax = 0.4, vmin = 0)
ax2.set_title('Y = 1, versicolor')
g3 = sns.heatmap(X_df[y==2].cov(),cmap="rainbow",
annot=None,cbar=False,ax=ax3,square=True,
vmax = 0.4, vmin = 0)
ax3.set_title('Y = 2, virginica')
#%% correlation matrix
RHO = X_df.corr()
fig, axs = plt.subplots()
h = sns.heatmap(RHO,cmap='rainbow', linewidths=.05,annot=True)
h.set_aspect("equal")
h.set_title('Correlation matrix')
#%% compare correlation matrices given class labels
f,(ax1,ax2,ax3) = plt.subplots(1,3,sharey=True)
g1 = sns.heatmap(X_df[y==0].corr(),cmap="rainbow",
annot=False,cbar=False,ax=ax1,square=True,
vmax = 1, vmin = 0.15)
ax1.set_title('Y = 0, setosa')
g2 = sns.heatmap(X_df[y==1].corr(),cmap="rainbow",
annot=False,cbar=False,ax=ax2,square=True,
vmax = 1, vmin = 0.15)
ax2.set_title('Y = 1, versicolor')
g3 = sns.heatmap(X_df[y==2].corr(),cmap="rainbow",
annot=False,cbar=False,ax=ax3,square=True,
vmax = 1, vmin = 0.15)
ax3.set_title('Y = 2, virginica')
#%% SIGMA = D@P@D
D = np.diag(np.sqrt(np.diag(SIGMA)))
fig, axs = plt.subplots(1, 7, figsize=(12, 3))
plt.sca(axs[0])
ax = sns.heatmap(SIGMA,cmap='rainbow',
vmin = -1, vmax = 2,
cbar=False)
ax.set_aspect("equal")
plt.title('$\Sigma$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(D,cmap='rainbow',
vmin = -1, vmax = 2,
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('D')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(RHO,cmap='rainbow',
vmin = -1, vmax = 2,
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('P')
plt.sca(axs[5])
plt.title('@')
plt.axis('off')
plt.sca(axs[6])
ax = sns.heatmap(D,cmap='rainbow',
vmin = -1, vmax = 2,
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('D')
#%% Eigen decomposition of covariance matrix
LAMBDA_,V = np.linalg.eig(SIGMA)
LAMBDA = np.diag(LAMBDA_)
fig, axs = plt.subplots(1, 7, figsize=(12, 3))
plt.sca(axs[0])
ax = sns.heatmap(SIGMA,cmap='rainbow', cbar=False)
ax.set_aspect("equal")
plt.title('$\Sigma$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(V,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V$')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(LAMBDA,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$\Lambda$')
plt.sca(axs[5])
plt.title('@')
plt.axis('off')
plt.sca(axs[6])
ax = sns.heatmap(V.T,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V^T$')
fig, axs = plt.subplots(1, 5, figsize=(12, 3))
plt.sca(axs[0])
ax = sns.heatmap(V@V.T,cmap='rainbow', cbar=False,
vmax = 2.5,vmin = 0)
ax.set_aspect("equal")
plt.title('$I$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(V,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V$')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(V.T,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V^T$')
#%% Projection
X_df_centered = X_df - X_df.mean()
Yc = X_df_centered@V
print(Yc.T@Yc)
fig, axs = plt.subplots()
h = sns.heatmap(Yc.T@Yc,cmap='rainbow', linewidths=.05)
h.set_aspect("equal")
#%% tensor products
for idx in range(4):
v_j = V[:,idx].reshape(-1,1)
tensor_prod = v_j @ v_j.T
fig, ax = plt.subplots(figsize=(6, 6))
ax = sns.heatmap(tensor_prod,cmap='rainbow', cbar=False,
vmax = 1,vmin = -1)
ax.set_aspect("equal")
plt.title('$v_' + str(idx + 1) + ' @ v_' + str(idx + 1) + '^T$')
#%% spectral decomposition layers
SIGMA_reprod = np.zeros(4)
for idx in range(4):
v_j = V[:,idx].reshape(-1,1)
lambda_j = LAMBDA_[idx]
tensor_prod = lambda_j * v_j @ v_j.T
SIGMA_reprod = SIGMA_reprod + tensor_prod
fig, ax = plt.subplots(figsize=(6, 6))
ax = sns.heatmap(tensor_prod,cmap='rainbow', cbar=False,
vmax = SIGMA.max().max(),vmin = SIGMA.min().min())
ax.set_aspect("equal")
plt.title('$\u03BB_' + str(idx + 1) + 'v_' + str(idx + 1) + ' @ v_' + str(idx + 1) + '^T$')
#%% decomposition of covariance Matrix inverse
fig, axs = plt.subplots(1, 7, figsize=(12, 3))
plt.sca(axs[0])
ax = sns.heatmap(SIGMA_inv,cmap='rainbow', cbar=False)
ax.set_aspect("equal")
plt.title('$\Sigma^{-1}$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(V,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V$')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(np.linalg.inv(LAMBDA),cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$\Lambda^{-1}$')
plt.sca(axs[5])
plt.title('@')
plt.axis('off')
plt.sca(axs[6])
ax = sns.heatmap(V.T,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V^T$')
#%% Eigen decomposition of correlation matrix
LAMBDA_P_,V_P = np.linalg.eig(RHO)
LAMBDA_P = np.diag(LAMBDA_P_)
fig, axs = plt.subplots(1, 7, figsize=(12, 3))
plt.sca(axs[0])
ax = sns.heatmap(RHO,cmap='rainbow', cbar=False)
ax.set_aspect("equal")
plt.title('$P$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(V_P,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V_Z$')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(LAMBDA_P,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$\Lambda_Z$')
plt.sca(axs[5])
plt.title('@')
plt.axis('off')
plt.sca(axs[6])
ax = sns.heatmap(V_P.T,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V_Z^T$')
#%% SVD, economic
X_c_df = X_df - X_df.mean()
U, S_, V = np.linalg.svd(X_c_df, full_matrices = False)
V = V.T
S = np.diag(S_)
fig, axs = plt.subplots(1, 7, figsize=(12, 4))
plt.sca(axs[0])
ax = sns.heatmap(X_c_df,cmap='rainbow', yticklabels=False,
cbar_kws={"orientation": "horizontal"})
plt.title('$X_c$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(U,cmap='rainbow', yticklabels=False,
cbar_kws={"orientation": "horizontal"})
plt.title('$U$')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(S,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$S$')
plt.sca(axs[5])
plt.title('@')
plt.axis('off')
plt.sca(axs[6])
ax = sns.heatmap(V.T,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V^T$')
#%% projection Yc = Xc @ V
Y_c_df = X_c_df @ V
fig, axs = plt.subplots(1, 5, figsize=(12, 4))
plt.sca(axs[0])
ax = sns.heatmap(Y_c_df,cmap='rainbow', yticklabels=False,
cbar_kws={"orientation": "horizontal"})
plt.title('$Y_c$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(X_c_df,cmap='rainbow', yticklabels=False,
cbar_kws={"orientation": "horizontal"})
plt.title('$X_c$')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(V,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('$V$')
#%% relationship between lambda and singular values
Lambda_reproduced = S**2/(len(X_df) - 1)
Lambda_reproduced - LAMBDA # for test only
fig, axs = plt.subplots(1, 7, figsize=(12, 4))
plt.sca(axs[0])
ax = sns.heatmap(LAMBDA,cmap='rainbow', yticklabels=False,
cbar_kws={"orientation": "horizontal"},
annot=True, fmt='.2f')
plt.title('$/Lambda$')
ax.set_aspect("equal")
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(np.array([[1/(len(X_df) - 1)]]),
cmap='rainbow', yticklabels=False,
cbar_kws={"orientation": "horizontal"},
annot=True, fmt='.3f')
plt.title('$1/(n-1)$')
ax.set_aspect("equal")
plt.sca(axs[3])
plt.title('*')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(S,cmap='rainbow', yticklabels=False,
cbar_kws={"orientation": "horizontal"},
annot=False, fmt='.2f')
plt.title('$S$')
ax.set_aspect("equal")
plt.sca(axs[5])
plt.title('@')
plt.axis('off')
plt.sca(axs[6])
ax = sns.heatmap(S,cmap='rainbow', yticklabels=False,
cbar_kws={"orientation": "horizontal"},
annot=False, fmt='.2f')
ax.set_aspect("equal")
plt.title('$S$')
#%% spectral decomposition of economic SVD
Xc_reprod = np.zeros((150,4))
for idx in range(4):
v_j = V[:,idx].reshape(-1,1)
u_j = U[:,idx].reshape(-1,1)
s_j = S_[idx]
tensor_prod = s_j * u_j @ v_j.T
Xc_reprod = Xc_reprod + tensor_prod
fig, ax = plt.subplots(figsize=(6, 6))
ax = sns.heatmap(tensor_prod,cmap='rainbow', cbar=False,
vmax = X_c_df.max().max(),vmin = X_c_df.min().min())
# ax.set_aspect("equal")
plt.title('$s_' + str(idx + 1) + 'u_' + str(idx + 1) + ' @ v_' + str(idx + 1) + '^T$')
#%% SIGMA_inv to Mahal distance sq
MU = X_df.mean()
MU = np.array([MU]).T
x = np.zeros_like(MU)
d2 = (x - MU).T@SIGMA_inv@(x - MU)
fig, axs = plt.subplots(1, 7, figsize=(12, 3))
plt.sca(axs[0])
ax = sns.heatmap(np.matrix(d2),cmap='rainbow', linewidths=.05,annot=True,
cbar_kws={"orientation": "horizontal"},fmt = '.2f')
ax.set_aspect("equal")
plt.title('$d^2$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap((x - MU).T,cmap='rainbow', linewidths=.05,annot=True,
cbar_kws={"orientation": "horizontal"},fmt = '.2f')
ax.set_aspect("equal")
plt.title('(x - $\mu$)^T')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(SIGMA_inv,cmap='rainbow', linewidths=.05,annot=True,
cbar_kws={"orientation": "horizontal"},fmt = '.2f')
ax.set_aspect("equal")
plt.title('$\Sigma^{-1}$')
plt.sca(axs[5])
plt.title('@')
plt.axis('off')
plt.sca(axs[6])
ax = sns.heatmap((x - MU),cmap='rainbow', linewidths=.05,annot=True,
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('(x - $\mu$)')
#%% Cholesky decomposition
import scipy.linalg
L = scipy.linalg.cholesky(SIGMA, lower=True)
R = scipy.linalg.cholesky(SIGMA, lower=False)
fig, axs = plt.subplots(1, 5, figsize=(12, 3))
plt.sca(axs[0])
ax = sns.heatmap(SIGMA,cmap='rainbow', cbar=False)
ax.set_aspect("equal")
plt.title('$\Sigma$')
plt.sca(axs[1])
plt.title('=')
plt.axis('off')
plt.sca(axs[2])
ax = sns.heatmap(L,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('L')
plt.sca(axs[3])
plt.title('@')
plt.axis('off')
plt.sca(axs[4])
ax = sns.heatmap(R,cmap='rainbow',
cbar_kws={"orientation": "horizontal"})
ax.set_aspect("equal")
plt.title('R')
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/fosterkong/Book5_Essentials-of-Probability-and-Statistics.git
git@gitee.com:fosterkong/Book5_Essentials-of-Probability-and-Statistics.git
fosterkong
Book5_Essentials-of-Probability-and-Statistics
Book5_Essentials-of-Probability-and-Statistics
main

搜索帮助