机器学习实战02 — K-近邻算法识别手写数字
Aug062019
按照书中内容写了一个手写数字识别demo.
尝试自己手写数字并拍了照片,将照片缩小成32*32,然后转成灰度图,再转成二值矩阵.
实际效果不太好,还需要优化.
先贴代码,日后改善.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import operator
from os import listdir
from PIL import Image
def main():
print(img2vector('./digits/testDigits/0_1.txt')[0, 0:31])
handwritingClassTest()
def img2vector(filename):
returnVect = np.zeros([1, 1024])
fr = open(filename)
for i in range(32):
lineStr = fr.readline()
for j in range(32):
returnVect[0, 32 * i + j] = int(lineStr[j])
return returnVect
def handwritingClassTest():
hwLabels = []
trainingFileList = listdir('./digits/trainingDigits')
m = len(trainingFileList)
print(f"file number = {m}")
trainingMat = np.zeros([m, 1024])
for i in range(m):
filenameStr = trainingFileList[i]
fileStr = filenameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
hwLabels.append(classNumStr)
trainingMat[i, :] = img2vector(f'./digits/trainingDigits/{filenameStr}')
testFileList = listdir('./digits/testDigits')
errorCount = 0.0
mTest = len(testFileList)
for i in range(mTest):
filenameStr = testFileList[i]
fileStr = filenameStr.split('.')[0]
classNumStr = int(fileStr.split('_')[0])
vectorUnderTest = img2vector(f'./digits/testDigits/{filenameStr}')
classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3)
print(f"test={classifierResult}, real num={classNumStr}")
if classifierResult != classNumStr:
errorCount += 1
print(f"total error num={errorCount}")
print(f"total error rate is: {errorCount / float(mTest)}")
def classify0(inX, trainingDataSet, labels, k):
dataSetSize = trainingDataSet.shape[0]
diffMat = np.tile(inX, (dataSetSize, 1)) - trainingDataSet
# 将inX 平铺(复制)成跟训练集同样的行数
# 如果inX = [a,b], 训练集有2行,
# 则结果为 一个矩阵
# [[a,b],
# [a,b]]
# 然后再和训练集做矩阵减法
sqDiffMat = diffMat ** 2
sqDistances = sqDiffMat.sum(axis=1)
distances = sqDistances ** 0.5
sortedDistIndices = distances.argsort()
classCount = {}
for i in range(k):
voteILabel = labels[sortedDistIndices[i]]
classCount[voteILabel] = classCount.get(voteILabel, 0) + 1
sortedClassCount = sorted(classCount.items(), key=operator.itemgetter(1), reverse=True)
return sortedClassCount[0][0]
def pretreatment(filename):
ima = Image.open(filename)
ima = ima.resize((32, 32), Image.ANTIALIAS)
ima = ima.convert('L') # 转化为灰度图像
# ima.show()
data = ima.getdata()
im = np.array(ima) # 转化为二维数组
for i in range(im.shape[0]): # 转化为二值矩阵
for j in range(im.shape[1]):
if im[i, j]

机器学习实战02 1.19 MB
微信赞赏
支付宝赞赏