Skip to content

Commit c4e1e5e

Browse files
Add files via upload
1 parent df4e427 commit c4e1e5e

17 files changed

+1378
-0
lines changed

pixel2style2pixel/models/__init__.py

Whitespace-only changes.
Binary file not shown.
Binary file not shown.

pixel2style2pixel/models/stylegan2/__init__.py

Whitespace-only changes.
Binary file not shown.
Binary file not shown.

pixel2style2pixel/models/stylegan2/model.py

+692
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .fused_act import FusedLeakyReLU, fused_leaky_relu
2+
from .upfirdn2d import upfirdn2d
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
import os
2+
3+
import torch
4+
from torch import nn
5+
from torch.autograd import Function
6+
from torch.utils.cpp_extension import load
7+
8+
module_path = os.path.dirname(__file__)
9+
fused = load(
10+
'fused',
11+
sources=[
12+
os.path.join(module_path, 'fused_bias_act.cpp'),
13+
os.path.join(module_path, 'fused_bias_act_kernel.cu'),
14+
],
15+
)
16+
17+
18+
class FusedLeakyReLUFunctionBackward(Function):
19+
@staticmethod
20+
def forward(ctx, grad_output, out, negative_slope, scale):
21+
ctx.save_for_backward(out)
22+
ctx.negative_slope = negative_slope
23+
ctx.scale = scale
24+
25+
empty = grad_output.new_empty(0)
26+
27+
grad_input = fused.fused_bias_act(
28+
grad_output, empty, out, 3, 1, negative_slope, scale
29+
)
30+
31+
dim = [0]
32+
33+
if grad_input.ndim > 2:
34+
dim += list(range(2, grad_input.ndim))
35+
36+
grad_bias = grad_input.sum(dim).detach()
37+
38+
return grad_input, grad_bias
39+
40+
@staticmethod
41+
def backward(ctx, gradgrad_input, gradgrad_bias):
42+
out, = ctx.saved_tensors
43+
gradgrad_out = fused.fused_bias_act(
44+
gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale
45+
)
46+
47+
return gradgrad_out, None, None, None
48+
49+
50+
class FusedLeakyReLUFunction(Function):
51+
@staticmethod
52+
def forward(ctx, input, bias, negative_slope, scale):
53+
empty = input.new_empty(0)
54+
out = fused.fused_bias_act(input, bias, empty, 3, 0, negative_slope, scale)
55+
ctx.save_for_backward(out)
56+
ctx.negative_slope = negative_slope
57+
ctx.scale = scale
58+
59+
return out
60+
61+
@staticmethod
62+
def backward(ctx, grad_output):
63+
out, = ctx.saved_tensors
64+
65+
grad_input, grad_bias = FusedLeakyReLUFunctionBackward.apply(
66+
grad_output, out, ctx.negative_slope, ctx.scale
67+
)
68+
69+
return grad_input, grad_bias, None, None
70+
71+
72+
class FusedLeakyReLU(nn.Module):
73+
def __init__(self, channel, negative_slope=0.2, scale=2 ** 0.5):
74+
super().__init__()
75+
76+
self.bias = nn.Parameter(torch.zeros(channel))
77+
self.negative_slope = negative_slope
78+
self.scale = scale
79+
80+
def forward(self, input):
81+
return fused_leaky_relu(input, self.bias, self.negative_slope, self.scale)
82+
83+
84+
def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5):
85+
return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include <torch/extension.h>
2+
3+
4+
torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
5+
int act, int grad, float alpha, float scale);
6+
7+
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
8+
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
9+
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
10+
11+
torch::Tensor fused_bias_act(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
12+
int act, int grad, float alpha, float scale) {
13+
CHECK_CUDA(input);
14+
CHECK_CUDA(bias);
15+
16+
return fused_bias_act_op(input, bias, refer, act, grad, alpha, scale);
17+
}
18+
19+
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
20+
m.def("fused_bias_act", &fused_bias_act, "fused bias act (CUDA)");
21+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
2+
//
3+
// This work is made available under the Nvidia Source Code License-NC.
4+
// To view a copy of this license, visit
5+
// https://nvlabs.github.io/stylegan2/license.html
6+
7+
#include <torch/types.h>
8+
9+
#include <ATen/ATen.h>
10+
#include <ATen/AccumulateType.h>
11+
#include <ATen/cuda/CUDAContext.h>
12+
#include <ATen/cuda/CUDAApplyUtils.cuh>
13+
14+
#include <cuda.h>
15+
#include <cuda_runtime.h>
16+
17+
18+
template <typename scalar_t>
19+
static __global__ void fused_bias_act_kernel(scalar_t* out, const scalar_t* p_x, const scalar_t* p_b, const scalar_t* p_ref,
20+
int act, int grad, scalar_t alpha, scalar_t scale, int loop_x, int size_x, int step_b, int size_b, int use_bias, int use_ref) {
21+
int xi = blockIdx.x * loop_x * blockDim.x + threadIdx.x;
22+
23+
scalar_t zero = 0.0;
24+
25+
for (int loop_idx = 0; loop_idx < loop_x && xi < size_x; loop_idx++, xi += blockDim.x) {
26+
scalar_t x = p_x[xi];
27+
28+
if (use_bias) {
29+
x += p_b[(xi / step_b) % size_b];
30+
}
31+
32+
scalar_t ref = use_ref ? p_ref[xi] : zero;
33+
34+
scalar_t y;
35+
36+
switch (act * 10 + grad) {
37+
default:
38+
case 10: y = x; break;
39+
case 11: y = x; break;
40+
case 12: y = 0.0; break;
41+
42+
case 30: y = (x > 0.0) ? x : x * alpha; break;
43+
case 31: y = (ref > 0.0) ? x : x * alpha; break;
44+
case 32: y = 0.0; break;
45+
}
46+
47+
out[xi] = y * scale;
48+
}
49+
}
50+
51+
52+
torch::Tensor fused_bias_act_op(const torch::Tensor& input, const torch::Tensor& bias, const torch::Tensor& refer,
53+
int act, int grad, float alpha, float scale) {
54+
int curDevice = -1;
55+
cudaGetDevice(&curDevice);
56+
cudaStream_t stream = at::cuda::getCurrentCUDAStream(curDevice);
57+
58+
auto x = input.contiguous();
59+
auto b = bias.contiguous();
60+
auto ref = refer.contiguous();
61+
62+
int use_bias = b.numel() ? 1 : 0;
63+
int use_ref = ref.numel() ? 1 : 0;
64+
65+
int size_x = x.numel();
66+
int size_b = b.numel();
67+
int step_b = 1;
68+
69+
for (int i = 1 + 1; i < x.dim(); i++) {
70+
step_b *= x.size(i);
71+
}
72+
73+
int loop_x = 4;
74+
int block_size = 4 * 32;
75+
int grid_size = (size_x - 1) / (loop_x * block_size) + 1;
76+
77+
auto y = torch::empty_like(x);
78+
79+
AT_DISPATCH_FLOATING_TYPES_AND_HALF(x.scalar_type(), "fused_bias_act_kernel", [&] {
80+
fused_bias_act_kernel<scalar_t><<<grid_size, block_size, 0, stream>>>(
81+
y.data_ptr<scalar_t>(),
82+
x.data_ptr<scalar_t>(),
83+
b.data_ptr<scalar_t>(),
84+
ref.data_ptr<scalar_t>(),
85+
act,
86+
grad,
87+
alpha,
88+
scale,
89+
loop_x,
90+
size_x,
91+
step_b,
92+
size_b,
93+
use_bias,
94+
use_ref
95+
);
96+
});
97+
98+
return y;
99+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
#include <torch/extension.h>
2+
3+
4+
torch::Tensor upfirdn2d_op(const torch::Tensor& input, const torch::Tensor& kernel,
5+
int up_x, int up_y, int down_x, int down_y,
6+
int pad_x0, int pad_x1, int pad_y0, int pad_y1);
7+
8+
#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
9+
#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
10+
#define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
11+
12+
torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
13+
int up_x, int up_y, int down_x, int down_y,
14+
int pad_x0, int pad_x1, int pad_y0, int pad_y1) {
15+
CHECK_CUDA(input);
16+
CHECK_CUDA(kernel);
17+
18+
return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
19+
}
20+
21+
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
22+
m.def("upfirdn2d", &upfirdn2d, "upfirdn2d (CUDA)");
23+
}

0 commit comments

Comments
 (0)