Format markdown python blocks

Jonas1312 · Jonas1312 · commit 3e0380a8bbea · 2023-06-11T10:05:51.000+02:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,3 +16,9 @@ repos:
     hooks:
       - id: isort
         name: isort (python)
+  - repo: https://github.com/adamchainz/blacken-docs
+    rev: "v1.12.1"
+    hooks:
+    -   id: blacken-docs
+        additional_dependencies:
+        - black==23.3.0
diff --git a/base/science-tech-maths/machine-learning/algorithms/autoencoder-vae/vae/vae.md b/base/science-tech-maths/machine-learning/algorithms/autoencoder-vae/vae/vae.md
@@ -8,7 +8,7 @@
 - Generating new images is now easy: all we need to do is sample a latent vector from the unit gaussian and pass it into the decoder.
 
 ```python
-image_loss = mean((generated_image - real_image)**2)
+image_loss = mean((generated_image - real_image) ** 2)
 latent_loss = kl_divergence(latent_variable, unit_gaussian)
 loss = image_loss + latent_loss
 ```
diff --git a/base/science-tech-maths/machine-learning/libs-frameworks/pytorch/pytorch.md b/base/science-tech-maths/machine-learning/libs-frameworks/pytorch/pytorch.md
@@ -123,10 +123,10 @@ non-scalar output.
 optimizer.zero_grad()
 scaled_loss = 0
 for accumulated_step_i in range(N_STEPS):
-  out = model.forward()
-  loss = ...
-  loss.backward()
-  scaled_loss += loss.item()
+    out = model.forward()
+    loss = ...
+    loss.backward()
+    scaled_loss += loss.item()
 optimizer.step()
 actual_loss = scaled_loss / N_STEPS
 ```
@@ -146,10 +146,12 @@ torch.backends.cudnn.benchmark = False
 
 ```python
 for child in model.children():
-  for param in child.parameters():
-    param.requires_grad = False
+    for param in child.parameters():
+        param.requires_grad = False
 
-optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=...)
+optimizer = torch.optim.Adam(
+    filter(lambda p: p.requires_grad, model.parameters()), lr=...
+)
 ```
 
 ### Save and load weights
@@ -175,42 +177,50 @@ new_model = nn.Sequential(*list(model.children())[:-1])
 ### Get number of parameters
 
 ```python
-num_params = sum(p.numel() for p in model.parameters()) # Total parameters
-num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)  # Trainable parameters
+num_params = sum(p.numel() for p in model.parameters())  # Total parameters
+num_trainable_params = sum(
+    p.numel() for p in model.parameters() if p.requires_grad
+)  # Trainable parameters
 ```
 
 ### No grad and inference_mode decorators
 
 ```python
 @torch.no_grad()
 def eval(model, data):
-  model.eval()
+    model.eval()
+
 
 @torch.inference_mode()
 def eval(model, data):
-  model.eval()
+    model.eval()
 ```
 
 ### Gradient clipping
 
 ```python
-torch.nn.utils.clip_grad_value_(parameters=model.parameters(), clip_value=1.)
+torch.nn.utils.clip_grad_value_(parameters=model.parameters(), clip_value=1.0)
 torch.nn.utils.clip_grad_norm_(parameters, max_norm, norm_type=2)
 ```
 
 ### Remove bias weight decay
 
 ```python
 def add_weight_decay(net, l2_value, skip_list=()):
-  decay, no_decay = [], []
-  for name, param in net.named_parameters():
-    if not param.requires_grad:
-      continue # frozen weights
-    if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
-      no_decay.append(param)
-    else:
-      decay.append(param)
-  return [{'params': no_decay, 'weight_decay': 0.}, {'params': decay, 'weight_decay': l2_value}]
+    decay, no_decay = [], []
+    for name, param in net.named_parameters():
+        if not param.requires_grad:
+            continue  # frozen weights
+        if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
+            no_decay.append(param)
+        else:
+            decay.append(param)
+    return [
+        {"params": no_decay, "weight_decay": 0.0},
+        {"params": decay, "weight_decay": l2_value},
+    ]
+
+
 params = add_weight_decay(net, 2e-5)
 sgd = torch.optim.SGD(params, lr=0.1)
 ```
@@ -244,26 +254,29 @@ print(u.grad)
 ### Weight init
 
 ```python
-def init_weights(net, init_type='normal', gain=0.02):
-  def init_func(m):
-    if isinstance(m, (nn.Conv2d, nn.Linear)):
-      if init_type == 'normal':
-        nn.init.normal_(m.weight.data, 0.0, gain)
-      elif init_type == 'xavier':
-        nn.init.xavier_normal_(m.weight.data, gain=gain)
-      elif init_type == 'kaiming':
-        nn.init.kaiming_normal_(m.weight.data, a=0, mode='fan_in')
-      elif init_type == 'orthogonal':
-        nn.init.orthogonal_(m.weight.data, gain=gain)
-      else:
-        raise NotImplementedError('initialization method [%s] is not implemented' % init_type)
-      if hasattr(m, 'bias') and m.bias is not None:
-        nn.init.constant_(m.bias.data, 0.0)
-    elif isinstance(m, nn.BatchNorm2d):
-      nn.init.normal_(m.weight.data, 1.0, gain)
-      nn.init.constant_(m.bias.data, 0.0)
-  print('initialize network with %s' % init_type)
-  net.apply(init_func)
+def init_weights(net, init_type="normal", gain=0.02):
+    def init_func(m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            if init_type == "normal":
+                nn.init.normal_(m.weight.data, 0.0, gain)
+            elif init_type == "xavier":
+                nn.init.xavier_normal_(m.weight.data, gain=gain)
+            elif init_type == "kaiming":
+                nn.init.kaiming_normal_(m.weight.data, a=0, mode="fan_in")
+            elif init_type == "orthogonal":
+                nn.init.orthogonal_(m.weight.data, gain=gain)
+            else:
+                raise NotImplementedError(
+                    "initialization method [%s] is not implemented" % init_type
+                )
+            if hasattr(m, "bias") and m.bias is not None:
+                nn.init.constant_(m.bias.data, 0.0)
+        elif isinstance(m, nn.BatchNorm2d):
+            nn.init.normal_(m.weight.data, 1.0, gain)
+            nn.init.constant_(m.bias.data, 0.0)
+
+    print("initialize network with %s" % init_type)
+    net.apply(init_func)
 ```
 
 ### Train/test/valid splits
@@ -282,7 +295,7 @@ losses.append(loss.item())  # good
 ### Copy an array
 
 ```python
-a = torch.tensor([1., 2., 3.])
+a = torch.tensor([1.0, 2.0, 3.0])
 b = a  # WRONG: same reference
 b = a.clone()
 ```
@@ -292,8 +305,8 @@ b = a.clone()
 ### Construct tensors directly on GPUs
 
 ```python
-t = tensor.rand(2,2).cuda()  # bad
-t = tensor.rand(2,2, device='cuda')  # good
+t = tensor.rand(2, 2).cuda()  # bad
+t = tensor.rand(2, 2, device="cuda")  # good
 ```
 
 ### Avoid CPU to GPU transfers or vice-versa
@@ -318,14 +331,14 @@ Set `torch.backends.cudnn.benchmark = True` Note that cudnn.benchmark will profi
 For example, if we perform x.cos().cos(), usually we need to perform 4 global reads and writes.
 
 ```python
-x1 = x.cos() # Read from x in global memory, write to x1
-x2 = x1.cos() # Read from x1 in global memory, write to x2
+x1 = x.cos()  # Read from x in global memory, write to x1
+x2 = x1.cos()  # Read from x1 in global memory, write to x2
 ```
 
 But, with operator fusion, we only need 2 global memory reads and writes! So operator fusion will speed it up by 2x.
 
 ```python
-x2 = x.cos().cos() # Read from x in global memory, write to x2
+x2 = x.cos().cos()  # Read from x in global memory, write to x2
 ```
 
 ### Gradient checkpointing
diff --git a/base/science-tech-maths/programming/algorithms/job-interviews/job-interviews.md b/base/science-tech-maths/programming/algorithms/job-interviews/job-interviews.md
@@ -9,7 +9,7 @@
 
   ```python
   def digit_length(n):
-    return 1 if n == 0 else (math.floor(math.log10(n)) + 1)
+      return 1 if n == 0 else (math.floor(math.log10(n)) + 1)
   ```
 
 ## Bit Tricks
diff --git a/base/science-tech-maths/programming/algorithms/time-space-complexity/big-o.md b/base/science-tech-maths/programming/algorithms/time-space-complexity/big-o.md
@@ -54,8 +54,8 @@ Therefore $X$ insertions take $\mathcal{O}(2X)=\mathcal{O}(X)$, that we divide b
 
 ```python
 for i in range(N):
-  for j in range(i+1, N):
-    pass
+    for j in range(i + 1, N):
+        pass
 ```
 
 the first time the inner loops runs for $N-1$ steps, then $N-2$, then $N-3$, etc
@@ -72,9 +72,9 @@ Be careful if your input is two arrays $A$ and $B$, the complexity can be $O(A+B
 
 ```python
 for i in A:
-  for j in B:
-    for k in range(10000):
-      do work
+    for j in B:
+        for k in range(10000):
+            pass  #  do work
 ```
 
 the complexity is $\mathcal{O}(AB)$
@@ -97,9 +97,9 @@ so the total is $\mathcal{O}(sNlog(N) + Nslog(s))$
 
 ```python
 def sum(node):
-  if node is None:
-    return 0
-  return sum(node.left) + sum(node.right) + value
+    if node is None:
+        return 0
+    return sum(node.left) + sum(node.right) + value
 ```
 
 complexity is $\mathcal{O}(branches^{depth}) = \mathcal{O}(2^{logN}) = \mathcal{O}(N)$
@@ -114,7 +114,7 @@ We have a binary tree (not search), it takes O(N) to find an element in it: $O(2
 
 ```python
 for i in range(N):
-  print(fib(i))  # we use recursive fib here
+    print(fib(i))  # we use recursive fib here
 ```
 
 iterative fib(N) is O(N), recursive is O(2^N) but don't confuse the N!!!
diff --git a/base/science-tech-maths/programming/languages/python/python.md b/base/science-tech-maths/programming/languages/python/python.md
diff --git a/base/science-tech-maths/programming/software-design-pattern/design-patterns.md b/base/science-tech-maths/programming/software-design-pattern/design-patterns.md