Skip to content

Commit 3e0380a

Browse files
committed
Format markdown python blocks
1 parent 201c8b9 commit 3e0380a

File tree

7 files changed

+294
-165
lines changed

7 files changed

+294
-165
lines changed

Diff for: .pre-commit-config.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,9 @@ repos:
1616
hooks:
1717
- id: isort
1818
name: isort (python)
19+
- repo: https://github.com/adamchainz/blacken-docs
20+
rev: "v1.12.1"
21+
hooks:
22+
- id: blacken-docs
23+
additional_dependencies:
24+
- black==23.3.0

Diff for: base/science-tech-maths/machine-learning/algorithms/autoencoder-vae/vae/vae.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
- Generating new images is now easy: all we need to do is sample a latent vector from the unit gaussian and pass it into the decoder.
99

1010
```python
11-
image_loss = mean((generated_image - real_image)**2)
11+
image_loss = mean((generated_image - real_image) ** 2)
1212
latent_loss = kl_divergence(latent_variable, unit_gaussian)
1313
loss = image_loss + latent_loss
1414
```

Diff for: base/science-tech-maths/machine-learning/libs-frameworks/pytorch/pytorch.md

+60-47
Original file line numberDiff line numberDiff line change
@@ -123,10 +123,10 @@ non-scalar output.
123123
optimizer.zero_grad()
124124
scaled_loss = 0
125125
for accumulated_step_i in range(N_STEPS):
126-
out = model.forward()
127-
loss = ...
128-
loss.backward()
129-
scaled_loss += loss.item()
126+
out = model.forward()
127+
loss = ...
128+
loss.backward()
129+
scaled_loss += loss.item()
130130
optimizer.step()
131131
actual_loss = scaled_loss / N_STEPS
132132
```
@@ -146,10 +146,12 @@ torch.backends.cudnn.benchmark = False
146146

147147
```python
148148
for child in model.children():
149-
for param in child.parameters():
150-
param.requires_grad = False
149+
for param in child.parameters():
150+
param.requires_grad = False
151151

152-
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=...)
152+
optimizer = torch.optim.Adam(
153+
filter(lambda p: p.requires_grad, model.parameters()), lr=...
154+
)
153155
```
154156

155157
### Save and load weights
@@ -175,42 +177,50 @@ new_model = nn.Sequential(*list(model.children())[:-1])
175177
### Get number of parameters
176178

177179
```python
178-
num_params = sum(p.numel() for p in model.parameters()) # Total parameters
179-
num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) # Trainable parameters
180+
num_params = sum(p.numel() for p in model.parameters()) # Total parameters
181+
num_trainable_params = sum(
182+
p.numel() for p in model.parameters() if p.requires_grad
183+
) # Trainable parameters
180184
```
181185

182186
### No grad and inference_mode decorators
183187

184188
```python
185189
@torch.no_grad()
186190
def eval(model, data):
187-
model.eval()
191+
model.eval()
192+
188193

189194
@torch.inference_mode()
190195
def eval(model, data):
191-
model.eval()
196+
model.eval()
192197
```
193198

194199
### Gradient clipping
195200

196201
```python
197-
torch.nn.utils.clip_grad_value_(parameters=model.parameters(), clip_value=1.)
202+
torch.nn.utils.clip_grad_value_(parameters=model.parameters(), clip_value=1.0)
198203
torch.nn.utils.clip_grad_norm_(parameters, max_norm, norm_type=2)
199204
```
200205

201206
### Remove bias weight decay
202207

203208
```python
204209
def add_weight_decay(net, l2_value, skip_list=()):
205-
decay, no_decay = [], []
206-
for name, param in net.named_parameters():
207-
if not param.requires_grad:
208-
continue # frozen weights
209-
if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
210-
no_decay.append(param)
211-
else:
212-
decay.append(param)
213-
return [{'params': no_decay, 'weight_decay': 0.}, {'params': decay, 'weight_decay': l2_value}]
210+
decay, no_decay = [], []
211+
for name, param in net.named_parameters():
212+
if not param.requires_grad:
213+
continue # frozen weights
214+
if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
215+
no_decay.append(param)
216+
else:
217+
decay.append(param)
218+
return [
219+
{"params": no_decay, "weight_decay": 0.0},
220+
{"params": decay, "weight_decay": l2_value},
221+
]
222+
223+
214224
params = add_weight_decay(net, 2e-5)
215225
sgd = torch.optim.SGD(params, lr=0.1)
216226
```
@@ -244,26 +254,29 @@ print(u.grad)
244254
### Weight init
245255

246256
```python
247-
def init_weights(net, init_type='normal', gain=0.02):
248-
def init_func(m):
249-
if isinstance(m, (nn.Conv2d, nn.Linear)):
250-
if init_type == 'normal':
251-
nn.init.normal_(m.weight.data, 0.0, gain)
252-
elif init_type == 'xavier':
253-
nn.init.xavier_normal_(m.weight.data, gain=gain)
254-
elif init_type == 'kaiming':
255-
nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
256-
elif init_type == 'orthogonal':
257-
nn.init.orthogonal_(m.weight.data, gain=gain)
258-
else:
259-
raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
260-
if hasattr(m, 'bias') and m.bias is not None:
261-
nn.init.constant_(m.bias.data, 0.0)
262-
elif isinstance(m, nn.BatchNorm2d):
263-
nn.init.normal_(m.weight.data, 1.0, gain)
264-
nn.init.constant_(m.bias.data, 0.0)
265-
print('initialize network with %s' % init_type)
266-
net.apply(init_func)
257+
def init_weights(net, init_type="normal", gain=0.02):
258+
def init_func(m):
259+
if isinstance(m, (nn.Conv2d, nn.Linear)):
260+
if init_type == "normal":
261+
nn.init.normal_(m.weight.data, 0.0, gain)
262+
elif init_type == "xavier":
263+
nn.init.xavier_normal_(m.weight.data, gain=gain)
264+
elif init_type == "kaiming":
265+
nn.init.kaiming_normal_(m.weight.data, a=0, mode="fan_in")
266+
elif init_type == "orthogonal":
267+
nn.init.orthogonal_(m.weight.data, gain=gain)
268+
else:
269+
raise NotImplementedError(
270+
"initialization method [%s] is not implemented" % init_type
271+
)
272+
if hasattr(m, "bias") and m.bias is not None:
273+
nn.init.constant_(m.bias.data, 0.0)
274+
elif isinstance(m, nn.BatchNorm2d):
275+
nn.init.normal_(m.weight.data, 1.0, gain)
276+
nn.init.constant_(m.bias.data, 0.0)
277+
278+
print("initialize network with %s" % init_type)
279+
net.apply(init_func)
267280
```
268281

269282
### Train/test/valid splits
@@ -282,7 +295,7 @@ losses.append(loss.item()) # good
282295
### Copy an array
283296

284297
```python
285-
a = torch.tensor([1., 2., 3.])
298+
a = torch.tensor([1.0, 2.0, 3.0])
286299
b = a # WRONG: same reference
287300
b = a.clone()
288301
```
@@ -292,8 +305,8 @@ b = a.clone()
292305
### Construct tensors directly on GPUs
293306

294307
```python
295-
t = tensor.rand(2,2).cuda() # bad
296-
t = tensor.rand(2,2, device='cuda') # good
308+
t = tensor.rand(2, 2).cuda() # bad
309+
t = tensor.rand(2, 2, device="cuda") # good
297310
```
298311

299312
### Avoid CPU to GPU transfers or vice-versa
@@ -318,14 +331,14 @@ Set `torch.backends.cudnn.benchmark = True` Note that cudnn.benchmark will profi
318331
For example, if we perform x.cos().cos(), usually we need to perform 4 global reads and writes.
319332

320333
```python
321-
x1 = x.cos() # Read from x in global memory, write to x1
322-
x2 = x1.cos() # Read from x1 in global memory, write to x2
334+
x1 = x.cos() # Read from x in global memory, write to x1
335+
x2 = x1.cos() # Read from x1 in global memory, write to x2
323336
```
324337

325338
But, with operator fusion, we only need 2 global memory reads and writes! So operator fusion will speed it up by 2x.
326339

327340
```python
328-
x2 = x.cos().cos() # Read from x in global memory, write to x2
341+
x2 = x.cos().cos() # Read from x in global memory, write to x2
329342
```
330343

331344
### Gradient checkpointing

Diff for: base/science-tech-maths/programming/algorithms/job-interviews/job-interviews.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
```python
1111
def digit_length(n):
12-
return 1 if n == 0 else (math.floor(math.log10(n)) + 1)
12+
return 1 if n == 0 else (math.floor(math.log10(n)) + 1)
1313
```
1414

1515
## Bit Tricks

Diff for: base/science-tech-maths/programming/algorithms/time-space-complexity/big-o.md

+9-9
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ Therefore $X$ insertions take $\mathcal{O}(2X)=\mathcal{O}(X)$, that we divide b
5454

5555
```python
5656
for i in range(N):
57-
for j in range(i+1, N):
58-
pass
57+
for j in range(i + 1, N):
58+
pass
5959
```
6060

6161
the first time the inner loops runs for $N-1$ steps, then $N-2$, then $N-3$, etc
@@ -72,9 +72,9 @@ Be careful if your input is two arrays $A$ and $B$, the complexity can be $O(A+B
7272

7373
```python
7474
for i in A:
75-
for j in B:
76-
for k in range(10000):
77-
do work
75+
for j in B:
76+
for k in range(10000):
77+
pass # do work
7878
```
7979

8080
the complexity is $\mathcal{O}(AB)$
@@ -97,9 +97,9 @@ so the total is $\mathcal{O}(sNlog(N) + Nslog(s))$
9797

9898
```python
9999
def sum(node):
100-
if node is None:
101-
return 0
102-
return sum(node.left) + sum(node.right) + value
100+
if node is None:
101+
return 0
102+
return sum(node.left) + sum(node.right) + value
103103
```
104104

105105
complexity is $\mathcal{O}(branches^{depth}) = \mathcal{O}(2^{logN}) = \mathcal{O}(N)$
@@ -114,7 +114,7 @@ We have a binary tree (not search), it takes O(N) to find an element in it: $O(2
114114

115115
```python
116116
for i in range(N):
117-
print(fib(i)) # we use recursive fib here
117+
print(fib(i)) # we use recursive fib here
118118
```
119119

120120
iterative fib(N) is O(N), recursive is O(2^N) but don't confuse the N!!!

0 commit comments

Comments
 (0)