- 邏輯回歸的推導過程:https://blog.csdn.net/ACM_hades/article/details/90448785
- 代碼主要實現了下面公式: W k + 1 = W k + λ X ( Y ? f W k ( X T ) ) W^{k+1}=W^k+λX(Y-f_{W^k } (X^T)) W k + 1 = W k + λ X ( Y ? f W k ? ( X T ) )
- 數據集 :我們選擇MNIST數據集進行實驗,它包含各種手寫數字(0-9)圖片,圖片大小28*28。MNIST數據集本身有10個類別,為了將其變成二分類問題我們進行如下處理:label等于0的繼續等于0,label大于0改為1。這樣就將十分類的數據改為二分類的數據。
-
特征選擇
:可選擇的特征有很多,包括:
- 自己提取特征
- 將整個圖片作為特征向量
- HOG特征
- 我們將整個圖片作為特征(784=28×28)。
import
time
import
numpy
as
np
import
pandas
as
pd
from
sklearn
.
model_selection
import
train_test_split
from
sklearn
.
metrics
import
accuracy_score
class
Logistic
:
def
__init__
(
self
,
feature_len
)
:
self
.
weights
=
np
.
ones
(
(
feature_len
,
1
)
)
def
model_function
(
self
,
X
)
:
W_X
=
np
.
matmul
(
X
,
self
.
weights
)
temp_1
=
(
W_X
>=
0
)
.
astype
(
np
.
float
)
temp_0
=
(
W_X
<
0
)
.
astype
(
np
.
float
)
resut_1
=
1.0
/
(
1
+
np
.
exp
(
-
temp_1
*
W_X
)
)
*
temp_1
#W_X為負數是,因為參數值inx很大時,exp(inx)可能會發生溢出,所以修改計算方式
resut_0
=
np
.
exp
(
temp_0
*
W_X
)
/
(
1
+
np
.
exp
(
temp_0
*
W_X
)
)
*
temp_0
return
resut_1
+
resut_0
def
train
(
self
,
Data
,
label
)
:
#訓練
label
=
label
.
reshape
(
(
-
1
,
1
)
)
alpha
=
0.01
max_iter
=
500
for
i
in
range
(
max_iter
)
:
#迭代
pres
=
self
.
model_function
(
Data
)
error
=
label
-
pres
#預測值和標簽值所形成的誤差
self
.
weights
=
self
.
weights
+
alpha
*
np
.
matmul
(
Data
.
T
,
error
)
#權重的更新
def
predict
(
self
,
Data
)
:
return
self
.
model_function
(
Data
)
.
reshape
(
-
1
)
if
__name__
==
'__main__'
:
print
(
'Start read data'
)
S
=
time
.
time
(
)
raw_data
=
pd
.
read_csv
(
'./lihang_book_algorithm-master/data/train_binary.csv'
)
# 讀取數據
data
=
raw_data
.
values
# 獲取數據
print
(
"data shape:"
,
data
.
shape
)
imgs
=
data
[
:
,
1
:
]
labels
=
data
[
:
,
0
]
print
(
"imgs shape:"
,
imgs
.
shape
)
imgs
=
np
.
concatenate
(
(
imgs
,
np
.
ones
(
(
imgs
.
shape
[
0
]
,
1
)
)
)
,
axis
=
1
)
#拼接常數項
print
(
"imgs shape:"
,
imgs
.
shape
)
print
(
"labels shape:"
,
labels
.
shape
)
print
(
"label:"
,
list
(
set
(
labels
)
)
)
Model
=
Logistic
(
imgs
.
shape
[
-
1
]
)
# 選取 2/3 數據作為訓練集, 1/3 數據作為測試集
train_features
,
test_features
,
train_labels
,
test_labels
=
train_test_split
(
imgs
,
labels
,
test_size
=
0.33
,
random_state
=
23323
)
print
(
"train data count :%d"
%
len
(
train_labels
)
)
print
(
"test data count :%d"
%
len
(
test_labels
)
)
print
(
'read data cost '
,
time
.
time
(
)
-
S
,
' second'
)
print
(
'Start training'
)
S
=
time
.
time
(
)
Model
.
train
(
train_features
,
train_labels
)
print
(
'training cost '
,
time
.
time
(
)
-
S
,
' second'
)
print
(
'Start predicting'
)
S
=
time
.
time
(
)
test_predict
=
Model
.
predict
(
test_features
)
print
(
'predicting cost '
,
time
.
time
(
)
-
S
,
' second'
)
score
=
accuracy_score
(
test_labels
,
test_predict
)
print
(
"The accruacy socre is "
,
score
)
結果:
Start read data
data shape: (42000, 785)
imgs_origin shape: (42000, 784)
imgs shape: (42000, 785)
labels shape: (42000,)
label: [0, 1]
train data count :28140
test data count :13860
read data cost 4.148890018463135 second
Start training
training cost 15.161401748657227 second
Start predicting
predicting cost 0.007978200912475586 second
The accruacy socre is 0.9892496392496393
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
