% Copyright (c) 2014 Boris Knyazev % Permission is hereby granted, free of charge, to any person obtaining % a copy of this software and associated documentation files (the % "Software"), to deal in the Software without restriction, including % without limitation the rights to use, copy, modify, merge, publish, % distribute, sublicense, and/or sell copies of the Software, and to % permit persons to whom the Software is furnished to do so, subject to % the following conditions: % % The above copyright notice and this permission notice shall be % included in all copies or substantial portions of the Software. % % THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, % EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF % MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND % NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE % LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION % OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION % WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. % RU % Данная функция реализует алгоритм классификации изображений рукописных % цифр базы MNIST (см. LeCun Y. [et al.], 1998, а также сайт http://yann.lecun.com/exdb/mnist/) % на основе фильтров Габора (см. D. Gabor, 1946, Daugman J. G., 1985), % локальных операторов минимума-максимума, используемых в (см. Labusch K., Barth E., Martinetz T. % Simple method for high-performance digit recognition based on sparse coding, 2008), % метода главных компонент (PCA) и машины опорных векторов (SVM) (Cortes C., Vapnik V., 1995). % Вклад данной работы заключается в разработке нового набора фильтров Габора % в количестве 198 штук, в выборе оптимальных параметров метода PCA и SVM. % Для запуска скрипта необходима библиотека libsvm (http://www.csie.ntu.edu.tw/~cjlin/libsvm/). % Для запуска скрипта автор использовал версию 3.18, версия MATLAB 2013b. % Также необходимо скачать и распаковать в папку данного скрипта файлы MNIST с сайта http://yann.lecun.com/exdb/mnist/. % Вопросы по данному скрипту могут быть направлены мне, автору кода, Борису Князеву, bknyazev at bmstu ru % EN % This function implements an algorithm for handwritten digits classification % taken from the MNIST set (see LeCun Y. [et al.], 1998, and http://yann.lecun.com/exdb/mnist/). % This algorithm is based on Gabor filters (see D. Gabor, 1946, Daugman J. G., 1985), % local minimum maximum operators used in Labusch K., Barth E., Martinetz T. % Simple method for high-performance digit recognition based on sparse coding, 2008, % principal component analysis and the support vector machine (Cortes C., Vapnik V., 1995). % The contribution of our work is development of a new bank of Gabor filters, 198 filters in total, % finding optimal parameters for the PCA and SVM methods. % To run this script the libsvm library (http://www.csie.ntu.edu.tw/~cjlin/libsvm/) must be set up. % To run this code the author used version 3.18 of it and MATLAB 2013b. % You also have to have MNIST datasets from http://yann.lecun.com/exdb/mnist/ unpacked and placed in the directory of this script. % For any questions, please, contact me, the author of this script, Boris Knyazev, bknyazev at bmstu ru function [accuracies_inst] = classifyDigits() addpath('C:\Project\Private\Asp\Articles\HierarchicalModel\Code\libsvm-3.18\matlab'); addpath('C:\Project\CVML\Sources\MNIST'); % загружаем тренировочные и тестовые данные, скачанные с сайта MNIST training_label_vector = loadMNISTLabels('train-labels.idx1-ubyte'); testing_label_vector = loadMNISTLabels('t10k-labels.idx1-ubyte'); trainingImages = loadMNISTImages('train-images.idx3-ubyte'); testingImages = loadMNISTImages('t10k-images.idx3-ubyte'); % ------------------------------------------------------------------------- imSize = repmat(sqrt(size(testingImages,1)),1,2); % Будем запускать 8 тестов для различного количества тренировочных изображений % Весь процесс может занять много времени (~10 часов), поэтому можно % запустить тест только для полной выборки (60*10^3 изображений) m_insts = 60*10^3; % [0.3,1,2,5,10,20,40,60].*10^3; accuracies_inst = zeros(length(m_insts),5); m_inst_test = size(testingImages,2); load('gaborBank198.mat'); % загружаем Фурье-образы разработанных фильтров Габора % Оптимальныен параметры разбиения изображения на области, тестировались значения 3-7 Mx = 4; My = 6; dim = 325; % оптимальное количество главных компонент (найденное с шагом 25) % Оптимальные параметры SVM, найденные с шагом 2^0.25 best_C = 2^2.75; best_gamma = 2^(-10); step = 10000; % шаг обработки данных, требуемый для избежания ошибки out of memory % Загружаем матрицу преобразования PCA и вектор средних значений, % полученную на основе анализа первых 10000 изображений тренировочной % выборки load('PCA_transformation_matrix.mat'); load('PCA_data_mean.mat'); % Формируем тестируемую выборку, полученную проекцией изображений на % размерности PCA [~, testing_instance_matrix] = createSetGaborPCA(imSize, testing_label_vector, testingImages, m_inst_test, gaborBank, 1, Mx, My); [testing_instance_matrix, ~, ~] = createSetPCA(testing_instance_matrix(1:m_inst_test,:), transformation_matrix, data_mean, dim); for i=1:length(m_insts) tic; if (m_insts(i) > step) training_instance_matrix = zeros(m_insts(i),dim); N_steps = m_insts(i)/step; for j=1:N_steps try % Формируем j-ую часть тренировочной выборки, полученной проекцией изображений на % размерности PCA [~, training_instance_matrix_tmp] = createSetGaborPCA(imSize, training_label_vector, ... trainingImages(:,(j-1)*step + 1:j*step), step, gaborBank, 1, Mx, My); [training_instance_matrix_tmp, ~, ~] = createSetPCA(training_instance_matrix_tmp, ... transformation_matrix, data_mean, dim); training_instance_matrix((j-1)*step + 1:j*step,:) = training_instance_matrix_tmp; catch warning('exception'); end clear training_instance_matrix_tmp; end else [~, training_instance_matrix_tmp] = createSetGaborPCA(imSize, training_label_vector, ... trainingImages(:,1:m_insts(i)), m_insts(i), gaborBank, 1, Mx, My); [training_instance_matrix, transformation_matrix, data_mean] = createSetPCA(training_instance_matrix_tmp, ... double.empty(0,1), double.empty(0,1), dim); clear training_instance_matrix_tmp; end time3 = toc; tic; time4 = toc; tic; % Обучаем модель используя libsvm model = svmtrain(training_label_vector(1:m_insts(i)), training_instance_matrix(1:m_insts(i),:), sprintf('-t 2 -c %f -g %f', best_C, best_gamma)); time1 = toc; tic; % Определяем классы тестируемых изображений используя libsvm [predicted_label, accuracy, ~] = svmpredict(testing_label_vector(1:m_inst_test), testing_instance_matrix, model); time2 = toc; accuracies_inst(i,:) = [accuracy(1), time1, time2, time3, time4]; clear training_instance_matrix; end clear training_instance_matrix; clear testing_instance_matrix; clear transformation_matrix; end % ------------------------------------------------------------------------- function [label_vector, instance_matrix] = createSetGaborPCA(imSize, labels, images, m_inst, gaborBank, first, Mx, My) label_vector = zeros(m_inst,1); GaborSize = numel(gaborBank); resSize = [Mx*My,2]; instance_matrix = zeros(m_inst,resSize(1)*resSize(2)*GaborSize); offset_x = 1; offset_y = 3; for i=first:min(m_inst,length(labels)) I = im2double(reshape(images(:,i), imSize)); X = fft2(I, size(I,1)*2-1, size(I,2)*2-1); X = fft2(standardizing(ifft2(X),false)); v = zeros(resSize(1)*resSize(2),numel(gaborBank)); for j=1:numel(gaborBank) c = abs(ifft2(X.*gaborBank{j})); c = downsampleConv(c, 2, 'space'); c = c(offset_y:imSize(1)-2,offset_x:imSize(2)); c = localMaxMin(c, Mx, My); % operator introduced in a paper "Simple method for high performance digit recognition" v(:,j) = c(:); end instance_matrix(i,:) = standardizing(v(:),false); label_vector(i) = labels(i); end end % ------------------------------------------------------------------------- function [instance_matrix, transformation_matrix, data_mean] = createSetPCA(subset, transformation_matrix, data_mean_new, dim) if (isempty(data_mean_new)) data_mean = mean(subset); else data_mean = data_mean_new; end [a,b] = size(subset); for i=1:a subset(i,:) = subset(i,:) - data_mean; end if (isempty(transformation_matrix)) cov_data = cov(subset); [evectors, ~, ~] = pcacov(cov_data); transformation_matrix = evectors(:,1:dim); end instance_matrix = subset*transformation_matrix; % projected data for i=1:size(instance_matrix,1) instance_matrix(i,:) = standardizing(instance_matrix(i,:),false); end end % ------------------------------------------------------------------------- function I = standardizing(I, issparse) if (issparse) er = 0; j = find(abs(I) > er); m = mean(I(j)); I(j) = I(j) - m; else I = I - mean(I(:)); end s = std(I(:)); if (s > 0) I = I/s; end end % ------------------------------------------------------------------------- function f = downsampleConv(f, coef, type) if (isequal(lower(type),'freq')) f = fftshift(f); c_y = round((size(f,1) + 1)/2); c_x = round((size(f,2) + 1)/2); d_y = round((c_y - 1)/coef); d_x = round((c_x - 1)/coef); f = f(c_y-d_y:c_y+d_y, c_x-d_x:c_x+d_x); f = ifftshift(f); elseif (isequal(lower(type),'space')) c_y = round((size(f,1) + 1)/2); c_x = round((size(f,2) + 1)/2); d_y = round((c_y - 1)/coef); d_x = round((c_x - 1)/coef); f = f(c_y-d_y:c_y+d_y, c_x-d_x:c_x+d_x); else error('type is invalid'); end end % ------------------------------------------------------------------------- function I_loc = localMaxMin(I, Mx, My) [s1,s2] = size(I); I_loc = zeros(Mx*My,2); for i=1:My for j=1:Mx roi = I((i-1)*s1/My+1:i*s1/My, (j-1)*s2/Mx+1:j*s2/Mx); I_loc((i-1)*Mx+j,1:2) = [min(roi(:)), max(roi(:))]'; end end end % ------------------------------------------------------------------------- % Функции loadMNISTLabels и loadMNISTImages взяты из интернета function labels = loadMNISTLabels(filename) %loadMNISTLabels returns a [number of MNIST images]x1 matrix containing %the labels for the MNIST images fp = fopen(filename, 'rb'); assert(fp ~= -1, ['Could not open ', filename, '']); magic = fread(fp, 1, 'int32', 0, 'ieee-be'); assert(magic == 2049, ['Bad magic number in ', filename, '']); numLabels = fread(fp, 1, 'int32', 0, 'ieee-be'); labels = fread(fp, inf, 'unsigned char'); assert(size(labels,1) == numLabels, 'Mismatch in label count'); fclose(fp); end function images = loadMNISTImages(filename) %loadMNISTImages returns a 28x28x[number of MNIST images] matrix containing %the raw MNIST images fp = fopen(filename, 'rb'); assert(fp ~= -1, ['Could not open ', filename, '']); magic = fread(fp, 1, 'int32', 0, 'ieee-be'); assert(magic == 2051, ['Bad magic number in ', filename, '']); numImages = fread(fp, 1, 'int32', 0, 'ieee-be'); numRows = fread(fp, 1, 'int32', 0, 'ieee-be'); numCols = fread(fp, 1, 'int32', 0, 'ieee-be'); images = fread(fp, inf, 'unsigned char'); images = reshape(images, numCols, numRows, numImages); images = permute(images,[2 1 3]); fclose(fp); % Reshape to #pixels x #examples images = reshape(images, size(images, 1) * size(images, 2), size(images, 3)); % Convert to double and rescale to [0,1] images = double(images) / 255; end % -------------------------------------------------------------------------