From 4856fb5f4755a2296b71fba04c4e5c2906ed9b06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=A2=81=E9=94=A6=E6=B6=9B?= <1971962997@qq.com>
Date: Thu, 20 Jan 2022 06:38:14 +0000
Subject: [PATCH] =?UTF-8?q?add=20=E7=AC=AC=E5=9B=9B=E6=AC=A1=E4=BD=9C?=
 =?UTF-8?q?=E4=B8=9A.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 ...3\233\346\254\241\344\275\234\344\270\232" | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 "\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232"

diff --git "a/\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232" "b/\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232"
new file mode 100644
index 0000000..81c4df4
--- /dev/null
+++ "b/\347\254\254\345\233\233\346\254\241\344\275\234\344\270\232"
@@ -0,0 +1,81 @@
+import matplotlib.pyplot as plt
+from numpy.ma.core import set_fill_value 
+from sklearn.datasets import load_digits
+import numpy as np
+
+class LogReg:
+    def __init__(self, alpha, lam, iter):
+        digits = load_digits()
+        self.image = digits.images
+        #print(self.digits.data.shape)
+        self.train_size = int(digits.data.shape[0]*0.9)
+        self.train = digits.data[:self.train_size][:]
+        self.test = digits.data[self.train_size:][:]
+        self.train = self.train.astype(np.float64)
+        self.target = digits.target[:self.train_size]
+        self.test_target = digits.target[self.train_size:]
+
+        self.theta = np.zeros((10,64))#(种类,8*8)
+        self.alpha = alpha
+        self.lam = lam
+        self.iter = iter
+
+
+    def sigmoid(self, x):
+        return 1 / (1 + np.exp(-x))  #sigmoid function
+    
+    def OneHot(self,data):
+        tmp = np.zeros((10,))
+        tmp[data] = 1
+        return tmp
+
+    def preprocessing(self):
+        self.train = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.train]).astype(np.float64)#归一化
+        self.test = np.array([(i-np.min(i))/(np.max(i) - np.min(i)) for i in self.test]).astype(np.float64)#归一化
+        self.target = np.array([self.OneHot(i) for i in self.target]).astype(np.float64)#标签独热化
+    
+    def Costfunction(self,i):
+        # argument i mean the ith 
+        theta = self.theta[i, :].reshape(self.theta.shape[1], 1)
+        label = self.target[:,i]
+        data = self.train
+
+        h = self.sigmoid(data.dot(theta))
+        # 代价函数
+        cost = (-1/self.train_size) * (label.T.dot(np.log(h))+(1-label).T.dot(np.log(1-h)))\
+                     + (self.lam/(2*self.train_size)) * theta.T.dot(theta)  
+ 
+        #求梯度
+        grad = (1 / self.train_size) * data.T.dot(h - label.reshape(-1,1))
+        # print(h)
+        grad = grad + (self.lam / self.train_size * theta) 
+        # print(grad.shape)
+        return cost, grad
+    
+    def regression(self):
+        for i in range(10):
+            for j in range(self.iter):
+                [J, grad] = self.Costfunction(i)
+                self.theta[i, :] = self.theta[i, :] - self.alpha * grad.T  
+            print(J)
+        # print(self.theta)
+        return self.theta
+
+    def predict(self):
+    	# calc 10 Hypothesis functions and select the max one
+    	# use argmax(1) to get the index of max_val of each row
+        pred = self.sigmoid(self.test.dot(self.theta.T)).argmax(1)
+        num = 0
+        for i in range(len(self.test_target)):
+            if(pred[i] == self.test_target[i]):
+                num += 1
+        print(float(num)/float(len(self.test_target))*100.0,"%")
+
+def main():
+    LR = LogReg( 1, 0.1, 3000)
+    LR.preprocessing()
+    LR.regression()
+    LR.predict()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
-- 
Gitee