Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Conv2d & Maxpool Layers #40

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ import {
CrossEntropyLoss,
MSELoss,
save,
load
load,
Conv2D,
MaxPool2D
} from "./layers.js";
import { Adam } from "./optim.js";
import { getShape } from "./utils.js";
Expand All @@ -59,7 +61,9 @@ const nn = {
Dropout,
LayerNorm,
CrossEntropyLoss,
MSELoss
MSELoss,
Conv2D,
MaxPool2D
};

const optim = { Adam };
Expand Down
74 changes: 73 additions & 1 deletion src/layers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,78 @@ export class MSELoss extends Module {
}
}


export class Conv2D extends Module {
constructor(
in_channels, out_channels, kernel_size, stride = 1, padding = "same",
dilation = 1, groups = 1, bias = true, device = "cpu"
) {
super();

const [kh, kw] = Array.isArray(kernel_size) ? kernel_size : [kernel_size, kernel_size];
const [sh, sw] = Array.isArray(stride) ? stride : [stride, stride];
const [dh, dw] = Array.isArray(dilation) ? dilation : [dilation, dilation];

let ph, pw;
if (padding === "same") {
ph = Math.floor(((kh - 1) * dh + 1 - sh) / 2);
pw = Math.floor(((kw - 1) * dw + 1 - sw) / 2);
} else if (Array.isArray(padding)) {
[ph, pw] = padding;
} else {
ph = pw = padding;
}

const weight_shape = [out_channels, Math.floor(in_channels / groups), kh, kw];
this.W = randn(weight_shape, true, device, false);
this.b = bias ? zeros([out_channels], true) : null;
this.has_bias = bias;

this.stride = [sh, sw];
this.padding = [ph, pw];
this.dilation = [dh, dw];
this.groups = groups;
}

forward(x) {
const [kernel_height, kernel_width] = [this.W.shape[2], this.W.shape[3]];
const [batch, out_channels] = [x.shape[0], this.W.shape[0]];
const out_height = Math.floor((x.shape[2] + 2 * this.padding[0] - kernel_height) / this.stride[0]) + 1;
const out_width = Math.floor((x.shape[3] + 2 * this.padding[1] - kernel_width) / this.stride[1]) + 1;


x = x.img2col(kernel_height, kernel_width, this.stride, this.padding);

let reshaped_weights = this.W.reshape([this.W.shape[0], this.W.shape[1] * kernel_height * kernel_width]).transpose(0, 1);

x = x.matmul(reshaped_weights);

x = x.reshape([batch, out_channels, out_height, out_width]);

if (this.has_bias && this.b) {
x = x.add(this.b);//not sure bias is working correctly
}

return x;
}
}

export class MaxPool2D extends Module {
public kernel_size: [number, number];
public stride: [number, number];

constructor(kernel_size: number | [number, number], stride?: number | [number, number]) {
super();
this.kernel_size = Array.isArray(kernel_size) ? kernel_size : [kernel_size, kernel_size];
this.stride = stride ? (Array.isArray(stride) ? stride : [stride, stride]) : this.kernel_size;
}

forward(x: Tensor): Tensor {
x=x.maxpool(this.kernel_size,this.stride);
return x;
}
}

/**
* Saves the model to a JSON file.
* @param {Module} model - Model to be saved in JSON file.
Expand Down Expand Up @@ -629,4 +701,4 @@ function loadParameters(source: Module, target: Module) {

}
}
}
}
217 changes: 217 additions & 0 deletions src/tensor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,16 @@ export class Tensor {
const operation = new Reshape();
return operation.forward(this, shape);
}

img2col(kernel_height: number, kernel_width: number, stride: [number, number], padding: [number, number]): Tensor {
const operation = new Img2Col();
return operation.forward(this, kernel_height,kernel_width,stride,padding);
}

maxpool(kernel_size: [number, number], stride: [number, number]):Tensor {
const operation = new MaxPool();
return operation.forward(this, kernel_size, stride);
}
}

// <<< Parameter class, tensor that always tracks gradients >>> //
Expand Down Expand Up @@ -1237,6 +1247,213 @@ export class Reshape {
}
}

export class MaxPool {
cache: any;

forward(a: Tensor, kernel_size: [number, number], stride: [number, number]): Tensor {
const [batch, channels, height, width] = a.shape;
const [kh, kw] = kernel_size;
const [sh, sw] = stride;

const out_height = Math.floor((height - kh) / sh + 1);
const out_width = Math.floor((width - kw) / sw + 1);
const outputData = new Array(batch).fill(0).map(() =>
new Array(channels).fill(0).map(() =>
new Array(out_height).fill(0).map(() => new Array(out_width).fill(0))
)
);

// Store max indices for backpropagation
const maxIndices = new Array(batch).fill(0).map(() =>
new Array(channels).fill(0).map(() =>
new Array(out_height).fill(0).map(() => new Array(out_width).fill([0, 0]))
)
);

// Perform max pooling operation using plain arrays
for (let b = 0; b < batch; b++) {
for (let c = 0; c < channels; c++) {
for (let i = 0; i < out_height; i++) {
for (let j = 0; j < out_width; j++) {
const h_start = i * sh;
const w_start = j * sw;
const h_end = h_start + kh;
const w_end = w_start + kw;

// Extract the region to pool
let max_val = -Infinity;
let max_idx = [0, 0];
for (let ki = h_start; ki < h_end; ki++) {
for (let kj = w_start; kj < w_end; kj++) {
if (ki >= 0 && ki < height && kj >= 0 && kj < width) {
const val = a.data[b][c][ki][kj];
if (val > max_val) {
max_val = val;
max_idx = [ki - h_start, kj - w_start]; // Store relative indices
}
}
}
}

outputData[b][c][i][j] = max_val;
maxIndices[b][c][i][j] = max_idx; // Store indices relative to the window
}
}
}
}

// Create output tensor
this.cache = { x: a, maxIndices, kernel_size, stride };

const z = new Tensor(outputData, requiresGrad(a));
if (a instanceof Tensor && requiresGrad(a)) {
z.parents.push(a);
a.children.push(z);
}

z.operation = this;

return z;
}

backward(dz: Tensor, z: Tensor) {
const { x, maxIndices, kernel_size, stride } = this.cache;
const [kh, kw] = kernel_size;
const [sh, sw] = stride;
const [batch, channels, out_height, out_width] = dz.shape;

// Initialize gradient tensor for input
const dx = new Array(batch).fill(0).map(() =>
new Array(channels).fill(0).map(() =>
new Array(x.shape[2]).fill(0).map(() => new Array(x.shape[3]).fill(0))
)
);

// Propagate gradients based on stored max indices
for (let b = 0; b < batch; b++) {
for (let c = 0; c < channels; c++) {
for (let i = 0; i < out_height; i++) {
for (let j = 0; j < out_width; j++) {
const [h_idx, w_idx] = maxIndices[b][c][i][j];
const h_start = i * sh;
const w_start = j * sw;

// Assign gradient to the max index position
dx[b][c][h_start + h_idx][w_start + w_idx] += dz.data[b][c][i][j];
}
}
}
}

// Use the `backward()` call to propagate gradients further
if (x.requires_grad) {
const dxTensor = new Tensor(dx);
x.backward(dxTensor, z);
}
}

}

export class Img2Col {
cache: any;

forward(a: Tensor, kernel_height: number, kernel_width: number, stride: [number, number], padding: [number, number]): Tensor {
this.cache = [a, kernel_height, kernel_width, stride, padding]; // Cache all relevant data

const [batch, channels, height, width] = a.shape;
const out_height = Math.floor((height + 2 * padding[0] - kernel_height) / stride[0]) + 1;
const out_width = Math.floor((width + 2 * padding[1] - kernel_width) / stride[1]) + 1;

const col_data = [];


for (let b = 0; b < batch; b++) {
for (let i = 0; i < out_height; i++) {
for (let j = 0; j < out_width; j++) {

const patch = [];
for (let c = 0; c < channels; c++) {
for (let kh = 0; kh < kernel_height; kh++) {
for (let kw = 0; kw < kernel_width; kw++) {

const h_idx = i * stride[0] - padding[0] + kh;
const w_idx = j * stride[1] - padding[1] + kw;
if (h_idx >= 0 && h_idx < height && w_idx >= 0 && w_idx < width) {
patch.push(a.data[b][c][h_idx][w_idx]);
} else {
patch.push(0); // Zero-padding
}
}
}
}
col_data.push(patch);
}
}
}

const z = new Tensor(col_data,requiresGrad(a));
if (a instanceof Tensor && requiresGrad(a)) {
z.parents.push(a);
a.children.push(z);
}

z.operation = this;

return z;
}


backward(dz: Tensor, z: Tensor) {
const [a, kernel_height, kernel_width, stride, padding] = this.cache;
const [batch, channels, height, width] = a.shape;
const out_height = Math.floor((height + 2 * padding[0] - kernel_height) / stride[0]) + 1;
const out_width = Math.floor((width + 2 * padding[1] - kernel_width) / stride[1]) + 1;

// Initialize gradient tensor for dx with the same shape as input a
const dx = new Tensor(new Array(batch).fill(0).map(() =>
new Array(channels).fill(0).map(() =>
new Array(height).fill(0).map(() => new Array(width).fill(0))
)
));

// Calculate the number of elements in each patch (channels * kernel_height * kernel_width)
const patch_size = channels * kernel_height * kernel_width;

let col_index = 0;
for (let b = 0; b < batch; b++) {
for (let i = 0; i < out_height; i++) {
for (let j = 0; j < out_width; j++) {
// Extract the gradient patch for this output position
const gradient_patch = dz.data[col_index];
let patch_index = 0; // Index to iterate through the patch values

for (let c = 0; c < channels; c++) {
for (let kh = 0; kh < kernel_height; kh++) {
for (let kw = 0; kw < kernel_width; kw++) {
const h_idx = i * stride[0] - padding[0] + kh;
const w_idx = j * stride[1] - padding[1] + kw;

if (h_idx >= 0 && h_idx < height && w_idx >= 0 && w_idx < width) {
// Accumulate the gradient from the current patch position
dx.data[b][c][h_idx][w_idx] += gradient_patch[patch_index];
}
patch_index++;
}
}
}

col_index++;
}
}
}
if (a.requires_grad) {
a.backward(dx, z);
}
}
}



// <<< Tensor Operation Aliases >>> //

/**
Expand Down