提取 MNIST_784的特征并进行分类


  1. 试着提取MNIST手写数字数据库(https://yann.lecun.com/exdb/mnist/)的一些特征(取部分数据集做训练,如每个数字10张图片做训练),并用某种分类算法对测试数据(10张不同数字图片做测试)进行分类,检查你所获得的分类准确率。
  2. ‎基于滑动窗口的物体检测方法和基于区域提名的物体检测方法的优缺点各是什么?
    答:
    1. ‎基于滑动窗口的物体检测方法:
      • 优点:较之其他方法可能更能准确找到目标,减少遗漏,实现较为简单
      • 缺点:搜索空间较大,要扫描的窗口较多,计算量大;需要分类的窗口数目过多,导致无法使用复杂的特征和分类器
    2. 基于区域提名的物体检测方法:
      • 优点:可以通过选择性搜索找出所有潜在包含目标的区域;计算速度快,召回率高
      • 缺点:算法复杂度较高

回顾一下图像的特征有哪些:
颜色特征,边缘、边界特征,兴趣点特征,纹理特征,形状特征
简单一些的就是,颜色特征,边缘特征。
首先构建数据集,并定义计算特征的方法,按照题目要求进行采样

数据集:
class DataSet(object):
    def __init__(self,train_num=10,test_num=10) -> None:
        self.__train_num=train_num
        self.__test_num=test_num
        self.X, self.y = fetch_openml("mnist_784", return_X_y=True, as_frame=False, parser="pandas",data_home='./data',cache=True)
        self.num2img = {}
        self.selectParital()

    def setArgs(self,train_num,test_num):
        self.__train_num=train_num
        self.__test_num=test_num
        self.selectParital()

    def calNum2img(self):
        for i,y in enumerate(self.y):
            self.num2img.setdefault(y,[]).append(i)

    def selectParital(self):
        self.calNum2img()
        trainl = []
        testl = []
        for i in range(10):
            samples = rng.sample(self.num2img[str(i)],self.__train_num+self.__test_num)
            trainl+=samples[:self.__train_num]
            testl+=samples[self.__train_num:]
        self.train_partial_x = self.X[trainl,]
        self.test_partial_x = self.X[testl,]
        y_f = np.copy(self.y).reshape([-1,1])
        self.train_partial_y = y_f[trainl,]
        self.test_partial_y = y_f[testl,]
    
    def setDefaultFea(self):
        self.selectParital()
        
    # 卡农,边缘特征
    def CannyFeas(self):
        self.train_partial_x = np.apply_along_axis(self.CannyEdge,1,self.train_partial_x)
        self.test_partial_x = np.apply_along_axis(self.CannyEdge,1,self.test_partial_x)
        # return list(map(self.CannyEdge,self.train_partial_x)),\
                # list(map(self.CannyEdge,self.test_partial_x)),\
    
    # 把灰度值二值化,可能算是图像的颜色特征
    def BinaryFeas(self):
        self.train_partial_x = np.array(map(self.BinaizeImg,self.train_partial_x))
        self.test_partial_x = np.array(map(self.BinaizeImg,self.test_partial_x))
        # return list(map(self.BinaizeImg,self.train_partial_x)),\
        #         list(map(self.BinaizeImg,self.test_partial_x)),\

    @staticmethod
    def BinaizeImg(mat:np.ndarray):
        dst= np.copy(mat)
        m = dst.mean()
        dst[dst>m]=255
        dst[dst<m]=0
        dst[dst==255]=1
        return dst

    @staticmethod
    def CannyEdge(mat:np.ndarray):
        return cv.Canny(mat.reshape([28,28]).astype('uint8'),10,10,L2gradient=True).reshape([-1,])

说明:
属性
- X,y:全部的数据集,共有70000条数据,shape: X (70000,784) y (70000,)
- num2img: 保存每个标签对于图像的索引列表的字典(对X,y映射),十个key,形似'0':[0,1,2,3,4,5]
- train_partial_x,train_partial_y,训练集及其标签,注意这里shape为(train_size,784) (train_size,1),标签可能需要特殊处理
- test_partial_y,test_partial_y,测试机及其标签,shape注意同上
方法
-setArgs:设置采样数,参数依次为测试集,训练集大小
- CannyFeas:无参数,表示使用Canny边缘特征做训练和测试
- BinaryFeas: 无参数,表示使用二值化图像做训练和测试
- setDefaultFea: 无参数,消除上面两个函数的影响,仍使用原图像做训练和测试

分类器:

基本都是直接使用sklearn里的算法。输出都是正确率。

class Classifier_me(object):
    def __init__(self,dataset:DataSet) -> None:
        self.train_x = dataset.train_partial_x
        self.train_y = dataset.train_partial_y.reshape([-1,])
        self.test_x = dataset.test_partial_x
        self.test_y = dataset.test_partial_y.reshape([-1,])

class DistanceBasedClassifier(Classifier_me):
    def Centroid(self):
        nc = NearestCentroid(metric='euclidean')
        nc.fit(self.train_x,self.train_y)
        y_pre = nc.predict(self.train_x)
        return accuracy_score(self.train_y,y_pre)
    
    def Nearest(self,k):
        nn = KNeighborsClassifier(n_neighbors=k)
        nn.fit(self.train_x,self.train_y)
        y_pre = nn.predict(self.test_x)
        return accuracy_score(self.train_y,y_pre)
    
    @staticmethod 
    def MahalanobisDistance(vs:np.ndarray,x):
        c = np.cov(vs)
        m = np.mean(vs,axis=0)
        s = np.diag(c)
        return (x-m).T*(1/s)*(x-m)

    
class BayesBasedClassifier(Classifier_me):
    def Gaussian(self):
        gn = GaussianNB()
        gn.fit(self.train_x,self.train_y)
        y_pre = gn.predict(self.train_x)
        return accuracy_score(self.train_y,y_pre)

class LinearClassifier(Classifier_me):
    def SVC(self):
        svm = LinearSVC(random_state=777)
        svm.fit(self.train_x,self.train_y)
        y_pre = svm.predict(self.train_x)
        return accuracy_score(self.train_y,y_pre)
    def LDA(self):
        lda = LinearDiscriminantAnalysis()
        lda.fit(self.train_x,self.train_y)
        y_pre = lda.predict(self.train_x)
        return accuracy_score(self.train_y,y_pre)

测试:
mnist_100x100 = DataSet(train_num=100,test_num=100)
# 这里用二值化图像
mnist_100x100.BinaryFeas()
# mnist_100x100.setDefaultFea()
# mnist_100x100.CannyFeas()
distance = DistanceBasedClassifier(mnist_100x100)
bayes = BayesBasedClassifier(mnist_100x100)
linear = LinearClassifier(mnist_100x100)
print("Use the mean to:\n",distance.Centroid())
print("Use the nearest dot to:\n",distance.Nearest(k=1))
print("Use k nearest dots to:\n",distance.Nearest(k=3))
print("Use naive bayes:\n",bayes.Gaussian())
print("Use Linear SVM:\n",linear.SVC())
print("Use Linear Discrimination Analysis:\n",linear.LDA())