From bf14e47654b55bbf0e4b12229507b86b278ccd03 Mon Sep 17 00:00:00 2001
From: Filippo Casarin <casarin.filippo17@gmail.com>
Date: Sun, 3 May 2020 14:06:22 +0200
Subject: [PATCH] Added save and load function

---
 .gitignore    |  1 +
 Makefile      |  7 ++--
 README.md     |  2 ++
 includes/nn.h | 22 ++++++++++++
 src/dnn.cpp   | 35 +++++++++++++-------
 src/nn.cpp    | 92 +++++++++++++++++++++++++++++++++++++++++++++++++--
 src/tnn.cpp   | 35 ++++++++++++++++++++
 7 files changed, 176 insertions(+), 18 deletions(-)
 create mode 100644 src/tnn.cpp
diff --git a/.gitignore b/.gitignore
index 0deb37f..f098e5c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 obj
 bin
 dataset
+nn.bin
diff --git a/Makefile b/Makefile
index 0bf0ddc..4cfaf0d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,14 @@
-# CC=g++ -std=c++17 -g -Ofast -Wall -Wextra -Iincludes -D_GLIBCXX_DEBUG -fsanitize=address
-CC=g++ -std=c++17 -Ofast -Wall -Wextra -Iincludes -DNDEBUG
+CC=g++ -std=c++17 -g -Ofast -Wall -Wextra -Iincludes -D_GLIBCXX_DEBUG -fsanitize=address
+# CC=g++ -std=c++17 -Ofast -Wall -Wextra -Iincludes -DNDEBUG
 
 DATASET=$(addprefix dataset/,t10k-labels-idx1-ubyte train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte)
-BINARY=$(addprefix bin/,k-NN dnn)
+BINARY=$(addprefix bin/,k-NN dnn tnn)
 
 all: $(BINARY) $(DATASET)
 
 bin/k-NN: $(addprefix obj/,k-NN.o dataset.o)
 bin/dnn: $(addprefix obj/,dnn.o nn.o dataset.o)
+bin/tnn: $(addprefix obj/,tnn.o nn.o dataset.o)
 
 $(BINARY): bin/% : | bin/
 	$(CC) -o $@ $^
diff --git a/README.md b/README.md
index 3a84832..6f15376 100644
--- a/README.md
+++ b/README.md
@@ -17,6 +17,8 @@ Error rate: 3.87%
 
 # Convolutional Neural Network
 Error rate: 3.61%
+96.80%
+97.87%
 
 # Compilation
 ```bash
diff --git a/includes/nn.h b/includes/nn.h
index 70d8ea7..c0a4b5f 100644
--- a/includes/nn.h
+++ b/includes/nn.h
@@ -2,9 +2,15 @@
 #include <vector>
 
 struct Layer {
+	virtual ~Layer() = default;
+
 	virtual std::vector<float> operator() (std::vector<float>&) = 0;
 	virtual std::vector<float> backprop(std::vector<float>&, std::vector<float>&, const std::vector<float>&) = 0;
 	virtual void apply() {}
+
+	virtual void save(std::ofstream&) = 0;
+
+	static Layer* fromFile(int idx, std::ifstream& fin);
 };
 
 struct LayerLinear : Layer {
@@ -14,12 +20,17 @@ struct LayerLinear : Layer {
 	LayerLinear(size_t I, size_t O);
 	~LayerLinear();
 
+	LayerLinear(std::ifstream&);
+	virtual void save(std::ofstream&) override;
+
 	virtual std::vector<float> operator() (std::vector<float>&) override;
 	virtual std::vector<float> backprop(std::vector<float>&, std::vector<float>&, const std::vector<float>&) override;
 	virtual void apply() override;
 };
 
 struct LayerSigmoid : Layer {
+	virtual void save(std::ofstream&) override;
+
 	virtual std::vector<float> operator() (std::vector<float>& m) override;
 	virtual std::vector<float> backprop(std::vector<float>& m, std::vector<float>& c, const std::vector<float>& p) override;
 };
@@ -29,6 +40,9 @@ struct LayerAveragePooling : Layer {
 
 	LayerAveragePooling(std::array<size_t, 2> S, std::array<size_t, 2> D) : D(D), S(S) {}
 
+	LayerAveragePooling(std::ifstream&);
+	virtual void save(std::ofstream&) override;
+
 	virtual std::vector<float> operator() (std::vector<float>&) override;
 	virtual std::vector<float> backprop(std::vector<float>&, std::vector<float>&, const std::vector<float>&) override;
 };
@@ -41,6 +55,9 @@ struct LayerConvolutional : Layer {
 	LayerConvolutional(size_t, size_t, std::array<size_t, 2>, std::array<size_t, 2>);
 	~LayerConvolutional();
 
+	LayerConvolutional(std::ifstream&);
+	virtual void save(std::ofstream&) override;
+
 	virtual std::vector<float> operator() (std::vector<float>&) override;
 	virtual std::vector<float> backprop(std::vector<float>&, std::vector<float>&, const std::vector<float>&) override;
 	virtual void apply() override;
@@ -50,6 +67,11 @@ struct NN {
 	std::vector<Layer*> layers;
 
 	NN(std::initializer_list<Layer*> il);
+	~NN();
+
+	NN(std::string path);
+	void save(std::string);
+
 	std::vector<float> operator() (std::vector<float> I);
 	void backprop(std::vector<float> I, const std::vector<float>& O);
 	void apply();
diff --git a/src/dnn.cpp b/src/dnn.cpp
index 97f271a..17b1bce 100644
--- a/src/dnn.cpp
+++ b/src/dnn.cpp
@@ -14,27 +14,35 @@ int main() {
 	srand(time(0));
 	load_dataset();
 
+	NN nn {
+		new LayerLinear(28*28, 28),
+		new LayerSigmoid,
+		new LayerLinear(28   , 28),
+		new LayerSigmoid,
+		new LayerLinear(28   , 10),
+		new LayerSigmoid,
+	};
+	
 	// NN nn {
-	// 	new LayerLinear(28*28, 28),
+	// 	new LayerConvolutional(1, 20, {24, 24}, {5, 5}),
+	// 	new LayerSigmoid,
+	// 	new LayerAveragePooling({12, 12}, {2, 2}),
+
+	// 	new LayerConvolutional(20, 40, {9, 9}, {4, 4}),
 	// 	new LayerSigmoid,
-	// 	new LayerLinear(28   , 28),
+	// 	new LayerAveragePooling({3, 3}, {3, 3}),
+
+	// 	new LayerLinear(40*3*3, 150),
 	// 	new LayerSigmoid,
-	// 	new LayerLinear(28   , 10),
+	// 	new LayerLinear(150, 10),
 	// 	new LayerSigmoid,
 	// };
-	
-	NN nn {
-		new LayerAveragePooling({14, 14}, {2, 2}),
-		new LayerConvolutional(1, 2, {10, 10}, {5, 5}),
-		new LayerSigmoid,
-
-		new LayerLinear(2*10*10, 10),
-		new LayerSigmoid,
-	};
 
 	vector<int> S(train_labels.size());
 	iota(S.begin(), S.end(), 0);
 
+	size_t M = 0;
+
 	do {
 		random_shuffle(S.begin(), S.end());
 		for (size_t i = 0; i < S.size(); i++) {
@@ -67,6 +75,9 @@ int main() {
 			C += max_element(O.begin(), O.end()) - O.begin() == test_labels[i];
 		}
 
+		if (C > M)
+			nn.save("nn.bin");
+
 		float P = 100.0f * C / test_labels.size();
 		printf("[");
 		for (size_t i = 0; i < 100; i++)
diff --git a/src/nn.cpp b/src/nn.cpp
index 21e70d9..5f71ffe 100644
--- a/src/nn.cpp
+++ b/src/nn.cpp
@@ -1,10 +1,22 @@
 #include <cassert>
 #include <cmath>
 #include <vector>
+#include <fstream>
 
-#include <nn.h>
+// #include <nn.h>
+#include "../includes/nn.h"
 using namespace std;
 
+Layer* Layer::fromFile(int idx, ifstream& fin) {
+	switch (idx) {
+		case 0: puts("LayerLinear"); return new LayerLinear(fin);
+		case 1: puts("LayerSigmoid"); return new LayerSigmoid;
+		case 2: puts("LayerAveragePooling"); return new LayerAveragePooling(fin);
+		case 3: puts("LayerConvolutional"); return new LayerConvolutional(fin);
+		default: assert(false); return nullptr;
+	}
+}
+
 LayerLinear::LayerLinear(size_t I, size_t O) : I(I), O(O) {
 	size_t T = (I + 1) * O;
 	W = new float[T];
@@ -18,6 +30,23 @@ LayerLinear::~LayerLinear() {
 	delete[] A;
 }
 
+LayerLinear::LayerLinear(ifstream& fin) {
+	fin.read((char*)&I, sizeof(I));
+	fin.read((char*)&O, sizeof(O));
+
+	size_t T = O * (I + 1);
+	W = new float[T];
+	A = new float[T] {};
+	fin.read((char*)W, T * sizeof(*W));
+}
+
+void LayerLinear::save(ofstream& fout) {
+	fout.write("\000", 1);
+	fout.write((char*)&I, sizeof(I));
+	fout.write((char*)&O, sizeof(O));
+	fout.write((char*)W, O * (I + 1) * sizeof(*W));
+}
+
 vector<float> LayerLinear::operator() (vector<float>& m) {
 	assert(m.size() == I);
 
@@ -53,6 +82,10 @@ void LayerLinear::apply() {
 	}
 }
 
+void LayerSigmoid::save(ofstream& fout) {
+	fout.write("\001", 1);
+}
+
 vector<float> LayerSigmoid::operator() (vector<float>& m) {
 	for (size_t i = 0; i < m.size(); i++)
 		m[i] = 1 / (1 + exp(-m[i]));
@@ -68,6 +101,15 @@ vector<float> LayerSigmoid::backprop(vector<float>& m, vector<float>& c, const v
 	return m;
 }
 
+LayerAveragePooling::LayerAveragePooling(ifstream& fin) {
+	fin.read((char*)this, sizeof(this));
+}
+
+void LayerAveragePooling::save(ofstream& fout) {
+	fout.write("\002", 1);
+	fout.write((char*)this, sizeof(this));
+}
+
 vector<float> LayerAveragePooling::operator() (vector<float>& m) {
 	assert(m.size() % (S[0] * D[0] * S[1] * D[1]) == 0);
 
@@ -98,8 +140,11 @@ vector<float> LayerAveragePooling::backprop(vector<float>& m, vector<float>&, co
 
 LayerConvolutional::LayerConvolutional(size_t I, size_t O, array<size_t, 2> S, array<size_t, 2> K)
 	: I(I), O(O), S(S), K(K) {
-		W = new float[O * (I * K[0] * K[1] + 1)];
-		A = new float[O * (I * K[0] * K[1] + 1)];
+		size_t T = O * (I * K[0] * K[1] + 1);
+		W = new float[T];
+		A = new float[T] {};
+		for (size_t i = 0; i < T; i++)
+			W[i] = (float)rand() / RAND_MAX * 2 - 1;
 }
 
 LayerConvolutional::~LayerConvolutional() {
@@ -107,6 +152,27 @@ LayerConvolutional::~LayerConvolutional() {
 	delete[] A;
 }
 
+LayerConvolutional::LayerConvolutional(ifstream& fin) {
+	fin.read((char*)&I, sizeof(I));
+	fin.read((char*)&O, sizeof(O));
+	fin.read((char*)&S, sizeof(S));
+	fin.read((char*)&K, sizeof(K));
+
+	size_t T = O * (I * K[0] * K[1] + 1);
+	W = new float[T];
+	A = new float[T] {};
+	fin.read((char*)W, T * sizeof(*W));
+}
+
+void LayerConvolutional::save(ofstream& fout) {
+	fout.write("\003", 1);
+	fout.write((char*)&I, sizeof(I));
+	fout.write((char*)&O, sizeof(O));
+	fout.write((char*)&S, sizeof(S));
+	fout.write((char*)&K, sizeof(K));
+	fout.write((char*)W, O * (I * K[0] * K[1] + 1) * sizeof(*W));
+}
+
 vector<float> LayerConvolutional::operator() (vector<float>& m) {
 	assert(m.size() == I * (S[0] + K[0] - 1) * (S[1] + K[1] - 1));
 
@@ -165,6 +231,11 @@ void LayerConvolutional::apply() {
 
 NN::NN(initializer_list<Layer*> il) : layers(il) {}
 
+NN::~NN() {
+	for (Layer* l : layers)
+		delete l;
+}
+
 vector<float> NN::operator() (vector<float> I) {
 	for (Layer* l : layers)
 		I = (*l)(I);
@@ -193,3 +264,18 @@ void NN::apply() {
 	for (Layer* l : layers)
 		l->apply();
 }
+
+void NN::save(string path) {
+	ofstream fout(path);
+	for (Layer* l : layers)
+		l->save(fout);
+	fout.close();
+}
+
+NN::NN(string path) {
+	ifstream fin(path);
+	char c;
+	while (c = fin.get(), !fin.eof()) 
+		layers.emplace_back(Layer::fromFile(c, fin));
+	fin.close();
+}
diff --git a/src/tnn.cpp b/src/tnn.cpp
new file mode 100644
index 0000000..33df715
--- /dev/null
+++ b/src/tnn.cpp
@@ -0,0 +1,35 @@
+#include <cstdlib>
+#include <ctime>
+
+#include <algorithm>
+#include <iostream>
+#include <numeric>
+#include <vector>
+
+#include <dataset.h>
+#include <nn.h>
+using namespace std;
+
+int main() {
+	srand(time(0));
+	load_dataset();
+
+	NN nn("nn.bin");
+
+	size_t C = 0;
+	for (size_t i = 0; i < test_labels.size(); i++) {
+		vector<float> I(28*28);
+		for (size_t j = 0; j < I.size(); j++)
+			I[j] = test_images[i][j] / 255.0f;
+
+		vector<float> O = nn(I);
+
+		C += max_element(O.begin(), O.end()) - O.begin() == test_labels[i];
+	}
+
+	float P = 100.0f * C / test_labels.size();
+	printf("[");
+	for (size_t i = 0; i < 100; i++)
+		printf("%c", i < (P - 90) * 10 ? '#' : ' ');
+	printf("] %.2f%%\n", P);
+}