From bf14e47654b55bbf0e4b12229507b86b278ccd03 Mon Sep 17 00:00:00 2001 From: Filippo Casarin Date: Sun, 3 May 2020 14:06:22 +0200 Subject: [PATCH] Added save and load function --- .gitignore | 1 + Makefile | 7 ++-- README.md | 2 ++ includes/nn.h | 22 ++++++++++++ src/dnn.cpp | 35 +++++++++++++------- src/nn.cpp | 92 +++++++++++++++++++++++++++++++++++++++++++++++++-- src/tnn.cpp | 35 ++++++++++++++++++++ 7 files changed, 176 insertions(+), 18 deletions(-) create mode 100644 src/tnn.cpp diff --git a/.gitignore b/.gitignore index 0deb37f..f098e5c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ obj bin dataset +nn.bin diff --git a/Makefile b/Makefile index 0bf0ddc..4cfaf0d 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,14 @@ -# CC=g++ -std=c++17 -g -Ofast -Wall -Wextra -Iincludes -D_GLIBCXX_DEBUG -fsanitize=address -CC=g++ -std=c++17 -Ofast -Wall -Wextra -Iincludes -DNDEBUG +CC=g++ -std=c++17 -g -Ofast -Wall -Wextra -Iincludes -D_GLIBCXX_DEBUG -fsanitize=address +# CC=g++ -std=c++17 -Ofast -Wall -Wextra -Iincludes -DNDEBUG DATASET=$(addprefix dataset/,t10k-labels-idx1-ubyte train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte) -BINARY=$(addprefix bin/,k-NN dnn) +BINARY=$(addprefix bin/,k-NN dnn tnn) all: $(BINARY) $(DATASET) bin/k-NN: $(addprefix obj/,k-NN.o dataset.o) bin/dnn: $(addprefix obj/,dnn.o nn.o dataset.o) +bin/tnn: $(addprefix obj/,tnn.o nn.o dataset.o) $(BINARY): bin/% : | bin/ $(CC) -o $@ $^ diff --git a/README.md b/README.md index 3a84832..6f15376 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ Error rate: 3.87% # Convolutional Neural Network Error rate: 3.61% +96.80% +97.87% # Compilation ```bash diff --git a/includes/nn.h b/includes/nn.h index 70d8ea7..c0a4b5f 100644 --- a/includes/nn.h +++ b/includes/nn.h @@ -2,9 +2,15 @@ #include struct Layer { + virtual ~Layer() = default; + virtual std::vector operator() (std::vector&) = 0; virtual std::vector backprop(std::vector&, std::vector&, const std::vector&) = 0; virtual void apply() {} + + virtual void save(std::ofstream&) = 0; + + static Layer* fromFile(int idx, std::ifstream& fin); }; struct LayerLinear : Layer { @@ -14,12 +20,17 @@ struct LayerLinear : Layer { LayerLinear(size_t I, size_t O); ~LayerLinear(); + LayerLinear(std::ifstream&); + virtual void save(std::ofstream&) override; + virtual std::vector operator() (std::vector&) override; virtual std::vector backprop(std::vector&, std::vector&, const std::vector&) override; virtual void apply() override; }; struct LayerSigmoid : Layer { + virtual void save(std::ofstream&) override; + virtual std::vector operator() (std::vector& m) override; virtual std::vector backprop(std::vector& m, std::vector& c, const std::vector& p) override; }; @@ -29,6 +40,9 @@ struct LayerAveragePooling : Layer { LayerAveragePooling(std::array S, std::array D) : D(D), S(S) {} + LayerAveragePooling(std::ifstream&); + virtual void save(std::ofstream&) override; + virtual std::vector operator() (std::vector&) override; virtual std::vector backprop(std::vector&, std::vector&, const std::vector&) override; }; @@ -41,6 +55,9 @@ struct LayerConvolutional : Layer { LayerConvolutional(size_t, size_t, std::array, std::array); ~LayerConvolutional(); + LayerConvolutional(std::ifstream&); + virtual void save(std::ofstream&) override; + virtual std::vector operator() (std::vector&) override; virtual std::vector backprop(std::vector&, std::vector&, const std::vector&) override; virtual void apply() override; @@ -50,6 +67,11 @@ struct NN { std::vector layers; NN(std::initializer_list il); + ~NN(); + + NN(std::string path); + void save(std::string); + std::vector operator() (std::vector I); void backprop(std::vector I, const std::vector& O); void apply(); diff --git a/src/dnn.cpp b/src/dnn.cpp index 97f271a..17b1bce 100644 --- a/src/dnn.cpp +++ b/src/dnn.cpp @@ -14,27 +14,35 @@ int main() { srand(time(0)); load_dataset(); + NN nn { + new LayerLinear(28*28, 28), + new LayerSigmoid, + new LayerLinear(28 , 28), + new LayerSigmoid, + new LayerLinear(28 , 10), + new LayerSigmoid, + }; + // NN nn { - // new LayerLinear(28*28, 28), + // new LayerConvolutional(1, 20, {24, 24}, {5, 5}), + // new LayerSigmoid, + // new LayerAveragePooling({12, 12}, {2, 2}), + + // new LayerConvolutional(20, 40, {9, 9}, {4, 4}), // new LayerSigmoid, - // new LayerLinear(28 , 28), + // new LayerAveragePooling({3, 3}, {3, 3}), + + // new LayerLinear(40*3*3, 150), // new LayerSigmoid, - // new LayerLinear(28 , 10), + // new LayerLinear(150, 10), // new LayerSigmoid, // }; - - NN nn { - new LayerAveragePooling({14, 14}, {2, 2}), - new LayerConvolutional(1, 2, {10, 10}, {5, 5}), - new LayerSigmoid, - - new LayerLinear(2*10*10, 10), - new LayerSigmoid, - }; vector S(train_labels.size()); iota(S.begin(), S.end(), 0); + size_t M = 0; + do { random_shuffle(S.begin(), S.end()); for (size_t i = 0; i < S.size(); i++) { @@ -67,6 +75,9 @@ int main() { C += max_element(O.begin(), O.end()) - O.begin() == test_labels[i]; } + if (C > M) + nn.save("nn.bin"); + float P = 100.0f * C / test_labels.size(); printf("["); for (size_t i = 0; i < 100; i++) diff --git a/src/nn.cpp b/src/nn.cpp index 21e70d9..5f71ffe 100644 --- a/src/nn.cpp +++ b/src/nn.cpp @@ -1,10 +1,22 @@ #include #include #include +#include -#include +// #include +#include "../includes/nn.h" using namespace std; +Layer* Layer::fromFile(int idx, ifstream& fin) { + switch (idx) { + case 0: puts("LayerLinear"); return new LayerLinear(fin); + case 1: puts("LayerSigmoid"); return new LayerSigmoid; + case 2: puts("LayerAveragePooling"); return new LayerAveragePooling(fin); + case 3: puts("LayerConvolutional"); return new LayerConvolutional(fin); + default: assert(false); return nullptr; + } +} + LayerLinear::LayerLinear(size_t I, size_t O) : I(I), O(O) { size_t T = (I + 1) * O; W = new float[T]; @@ -18,6 +30,23 @@ LayerLinear::~LayerLinear() { delete[] A; } +LayerLinear::LayerLinear(ifstream& fin) { + fin.read((char*)&I, sizeof(I)); + fin.read((char*)&O, sizeof(O)); + + size_t T = O * (I + 1); + W = new float[T]; + A = new float[T] {}; + fin.read((char*)W, T * sizeof(*W)); +} + +void LayerLinear::save(ofstream& fout) { + fout.write("\000", 1); + fout.write((char*)&I, sizeof(I)); + fout.write((char*)&O, sizeof(O)); + fout.write((char*)W, O * (I + 1) * sizeof(*W)); +} + vector LayerLinear::operator() (vector& m) { assert(m.size() == I); @@ -53,6 +82,10 @@ void LayerLinear::apply() { } } +void LayerSigmoid::save(ofstream& fout) { + fout.write("\001", 1); +} + vector LayerSigmoid::operator() (vector& m) { for (size_t i = 0; i < m.size(); i++) m[i] = 1 / (1 + exp(-m[i])); @@ -68,6 +101,15 @@ vector LayerSigmoid::backprop(vector& m, vector& c, const v return m; } +LayerAveragePooling::LayerAveragePooling(ifstream& fin) { + fin.read((char*)this, sizeof(this)); +} + +void LayerAveragePooling::save(ofstream& fout) { + fout.write("\002", 1); + fout.write((char*)this, sizeof(this)); +} + vector LayerAveragePooling::operator() (vector& m) { assert(m.size() % (S[0] * D[0] * S[1] * D[1]) == 0); @@ -98,8 +140,11 @@ vector LayerAveragePooling::backprop(vector& m, vector&, co LayerConvolutional::LayerConvolutional(size_t I, size_t O, array S, array K) : I(I), O(O), S(S), K(K) { - W = new float[O * (I * K[0] * K[1] + 1)]; - A = new float[O * (I * K[0] * K[1] + 1)]; + size_t T = O * (I * K[0] * K[1] + 1); + W = new float[T]; + A = new float[T] {}; + for (size_t i = 0; i < T; i++) + W[i] = (float)rand() / RAND_MAX * 2 - 1; } LayerConvolutional::~LayerConvolutional() { @@ -107,6 +152,27 @@ LayerConvolutional::~LayerConvolutional() { delete[] A; } +LayerConvolutional::LayerConvolutional(ifstream& fin) { + fin.read((char*)&I, sizeof(I)); + fin.read((char*)&O, sizeof(O)); + fin.read((char*)&S, sizeof(S)); + fin.read((char*)&K, sizeof(K)); + + size_t T = O * (I * K[0] * K[1] + 1); + W = new float[T]; + A = new float[T] {}; + fin.read((char*)W, T * sizeof(*W)); +} + +void LayerConvolutional::save(ofstream& fout) { + fout.write("\003", 1); + fout.write((char*)&I, sizeof(I)); + fout.write((char*)&O, sizeof(O)); + fout.write((char*)&S, sizeof(S)); + fout.write((char*)&K, sizeof(K)); + fout.write((char*)W, O * (I * K[0] * K[1] + 1) * sizeof(*W)); +} + vector LayerConvolutional::operator() (vector& m) { assert(m.size() == I * (S[0] + K[0] - 1) * (S[1] + K[1] - 1)); @@ -165,6 +231,11 @@ void LayerConvolutional::apply() { NN::NN(initializer_list il) : layers(il) {} +NN::~NN() { + for (Layer* l : layers) + delete l; +} + vector NN::operator() (vector I) { for (Layer* l : layers) I = (*l)(I); @@ -193,3 +264,18 @@ void NN::apply() { for (Layer* l : layers) l->apply(); } + +void NN::save(string path) { + ofstream fout(path); + for (Layer* l : layers) + l->save(fout); + fout.close(); +} + +NN::NN(string path) { + ifstream fin(path); + char c; + while (c = fin.get(), !fin.eof()) + layers.emplace_back(Layer::fromFile(c, fin)); + fin.close(); +} diff --git a/src/tnn.cpp b/src/tnn.cpp new file mode 100644 index 0000000..33df715 --- /dev/null +++ b/src/tnn.cpp @@ -0,0 +1,35 @@ +#include +#include + +#include +#include +#include +#include + +#include +#include +using namespace std; + +int main() { + srand(time(0)); + load_dataset(); + + NN nn("nn.bin"); + + size_t C = 0; + for (size_t i = 0; i < test_labels.size(); i++) { + vector I(28*28); + for (size_t j = 0; j < I.size(); j++) + I[j] = test_images[i][j] / 255.0f; + + vector O = nn(I); + + C += max_element(O.begin(), O.end()) - O.begin() == test_labels[i]; + } + + float P = 100.0f * C / test_labels.size(); + printf("["); + for (size_t i = 0; i < 100; i++) + printf("%c", i < (P - 90) * 10 ? '#' : ' '); + printf("] %.2f%%\n", P); +}