diff --git a/.gitignore b/.gitignore index c2cc671..4addbea 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ __pycache__/ /examples/exp/ /dist/ /test/ -/asset/ \ No newline at end of file +/asset/ +.pytest_cache/ \ No newline at end of file diff --git a/LICENSE b/LICENSE index 192c8bd..693ed5d 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022 ustcml +Copyright (c) 2022 Jintao Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index f20a6ff..84b021b 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,8 @@ ## Introduction 1. [Mini-Lightning](https://github.com/ustcml/mini-lightning/) is a Lightweight machine learning training library, which is a mini version of [Pytorch-Lightning](https://www.pytorchlightning.ai/) with only 1k lines of code. It has the advantages of faster, more concise and more flexible. 2. Existing features: support for DDP(multi-node and multi-gpu), Sync-BN, DP, AMP, gradient accumulation, warmup and lr_scheduler, grad clip, tensorboard, model and result saving, beautiful console log, torchmetrics, etc. -3. Only the minimal interfaces are exposed, keeping the features of simplicity, easy to read, use and extend. Additional functions can be found in [ml_alg](https://github.com/Jintao-Huang/ml_alg) -4. examples can be found in `/examples/` +3. Only the minimal interfaces are exposed, keeping the features of simplicity, easy to read, use and extend. +4. examples can be found in `examples/` 5. If you have any problems or bug finding, please raise issue, Thank you. @@ -13,7 +13,7 @@ 1. Download the latest version(>=1.12) of Torch(corresponding CUDA version) from the [official website](https://pytorch.org/get-started/locally/) of Torch. It is not recommended to automatically install Torch (CUDA 10.2) using the Mini-Lightning dependency, which will cause CUDA version mismatch. 2. Install mini-lightning ```bash -# from pypi (v0.1.0) +# from pypi (v0.1.1) pip install mini-lightning # Or download the files from the repository to local, diff --git a/examples/cv.py b/examples/cv.py index eab8766..6bb9bb8 100644 --- a/examples/cv.py +++ b/examples/cv.py @@ -120,7 +120,7 @@ def test_step(self, batch: Any) -> None: def collect_res(seed: int) -> Dict[str, float]: ml.seed_everything(seed, gpu_dtm=False) - model = tvm.resnet50(**hparams["model_hparams"]) + model = getattr(tvm, hparams["model_name"])(**hparams["model_hparams"]) state_dict = torch.hub.load_state_dict_from_url(**hparams["model_pretrain_model"]) state_dict = ml._remove_keys(state_dict, ["fc"]) logger.info(model.load_state_dict(state_dict, strict=False)) diff --git a/examples/cv_ddp.py b/examples/cv_ddp.py index 4c101a0..0e3792a 100644 --- a/examples/cv_ddp.py +++ b/examples/cv_ddp.py @@ -135,7 +135,7 @@ def parse_opt() -> Namespace: def collect_res(seed: int) -> Dict[str, float]: # Different GPUs use different seeds. Each GPU behaves differently ml.seed_everything(seed + rank, gpu_dtm=False) - model = tvm.resnet50(**hparams["model_hparams"]) + model = getattr(tvm, hparams["model_name"])(**hparams["model_hparams"]) state_dict = torch.hub.load_state_dict_from_url(**hparams["model_pretrain_model"]) state_dict = ml._remove_keys(state_dict, ["fc"]) if rank in {-1, 0}: diff --git a/examples/cv_ddp_spawn.py b/examples/cv_ddp_spawn.py index bb6f986..fa25d1f 100644 --- a/examples/cv_ddp_spawn.py +++ b/examples/cv_ddp_spawn.py @@ -130,7 +130,7 @@ def main(rank: int, world_size: int, device_ids: List[int]) -> None: def collect_res(seed: int) -> Dict[str, float]: # Different GPUs use different seeds. Each GPU behaves differently ml.seed_everything(seed + rank, gpu_dtm=False) - model = tvm.resnet50(**hparams["model_hparams"]) + model = getattr(tvm, hparams["model_name"])(**hparams["model_hparams"]) state_dict = torch.hub.load_state_dict_from_url(**hparams["model_pretrain_model"]) state_dict = ml._remove_keys(state_dict, ["fc"]) if rank in {-1, 0}: diff --git a/examples/dqn.py b/examples/dqn.py index cac0c9d..ae9686d 100644 --- a/examples/dqn.py +++ b/examples/dqn.py @@ -195,7 +195,7 @@ def training_step(self, batch: Any) -> Tensor: if __name__ == "__main__": ml.seed_everything(42, gpu_dtm=False) batch_size = 32 - max_epochs = 10 + max_epochs = 20 hparams = { "device_ids": device_ids, "memo_capacity": 1000, diff --git a/examples/nlp.py b/examples/nlp.py index 41d699d..a84aa2e 100644 --- a/examples/nlp.py +++ b/examples/nlp.py @@ -103,6 +103,7 @@ def tokenize_function(example): dataset["train"], dataset["validation"], dataset["test"], **hparams["dataloader_hparams"]) # model = BertForSequenceClassification.from_pretrained(model_name) + ml.freeze_layers(model, ["bert.embeddings."] + [f"bert.encoder.layer.{i}." for i in range(2)], True) optimizer = getattr(optim, hparams["optim_name"])(model.parameters(), **hparams["optim_hparams"]) metrics: Dict[str, Metric] = { "loss": MeanMetric(), diff --git a/mini_lightning/mini_lightning.py b/mini_lightning/mini_lightning.py index 007fa3c..be32d5e 100644 --- a/mini_lightning/mini_lightning.py +++ b/mini_lightning/mini_lightning.py @@ -51,7 +51,7 @@ def __init__( ) -> None: """ get_core_metric: Get the core_metric for saving the model. - The higher, the better. If lower is better, you can return a negative number. + The higher, the better. If lower is better, you can return a negative number. hparams: Hyperparameters to be saved """ self.model = model @@ -478,7 +478,8 @@ def _epoch_end(self, mes: Dict[str, float], metric: Optional[float]) -> bool: is_best = True # self._remove_ckpt("last") - ckpt_fname = f"last-epoch={self.global_epoch}-metric={metric:.6f}.ckpt" + metric_str = "None" if metric is None else f"{metric:.6f}" + ckpt_fname = f"last-epoch={self.global_epoch}-metric={metric_str}.ckpt" self.last_ckpt_path = os.path.join(self.ckpt_dir, ckpt_fname) self.lmodel.save_checkpoint(self.last_ckpt_path) # 2. result saving diff --git a/mini_lightning/utils.py b/mini_lightning/utils.py index 14f32d3..be3dfb9 100644 --- a/mini_lightning/utils.py +++ b/mini_lightning/utils.py @@ -28,7 +28,7 @@ "en_parallel", "de_parallel", "de_sync_batchnorm", "select_device", "_remove_keys", "smart_load_state_dict", "test_time", "seed_everything", "time_synchronize", "multi_runs", - "print_model_info", "save_to_yaml" + "print_model_info", "save_to_yaml", "freeze_layers" ] # @@ -293,8 +293,8 @@ def print_model_info(model: Module, inputs: Optional[Tuple[Any, ...]] = None) -> s = [ f"{model.__class__.__name__}: ", f"{n_layers} Layers, ", - f"{n_params:.4f}M Params, ", - f"{n_grads:.4f}M Grads, ", # Trainable Params(no freeze) + # Grads: Trainable Params(no freeze). Params-Grads: freeze + f"{n_params:.4f}M Params ({n_grads:.4f}M Grads), ", f"{n_buffers:.4f}M Buffers", ] if inputs is not None: @@ -311,3 +311,16 @@ def print_model_info(model: Module, inputs: Optional[Tuple[Any, ...]] = None) -> def save_to_yaml(obj: Any, file_path: str, encoding: str = "utf-8", mode: str = "w") -> None: with open(file_path, mode, encoding=encoding) as f: yaml.dump(obj, f) + + +def freeze_layers(model: Module, layer_prefix_names: List[str], verbose: bool = True) -> None: + # e.g. ml.freeze_layers(model, ["bert.embeddings."] + [f"bert.encoder.layer.{i}." for i in range(2)], True) + for n, p in model.named_parameters(): + requires_grad = True + for lpn in layer_prefix_names: + if n.startswith(lpn): + requires_grad = False + break + if verbose: + logger.info(f"Setting {n}.requires_grad: {requires_grad}") + p.requires_grad_(requires_grad) diff --git a/setup.py b/setup.py index 42a9747..01c97a9 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def read_file(path: str) -> str: ] setup( name="mini-lightning", - version="0.1.1.dev0", + version="0.1.1", description=description, long_description=long_description, long_description_content_type='text/markdown', diff --git a/tests/test_all.py b/tests/test_all.py index e3c1520..d074d9a 100644 --- a/tests/test_all.py +++ b/tests/test_all.py @@ -1,9 +1,9 @@ import unittest as ut -import mini_lightning as ml # from test_lrs import * from test_utils import * from test_visualize import * +from test_mini_lightning import * # if __name__ == "__main__": # run in mini-lightning folder: `python tests/test_all.py` diff --git a/tests/test_mini_lightning.py b/tests/test_mini_lightning.py new file mode 100644 index 0000000..8771db0 --- /dev/null +++ b/tests/test_mini_lightning.py @@ -0,0 +1,11 @@ +import os +import unittest as ut + + +class TestML(ut.TestCase): + def test_ml(self): + os.system("python examples/test_env.py") + + +if __name__ == "__main__": + ut.main()