Merge pull request #33 from enesozeren/train_script_fix

jsbroden · web-flow · commit 37fca301e9c4 · 2024-07-14T17:17:53.000+02:00
Train script fix
diff --git a/README.md b/README.md
@@ -86,7 +86,7 @@ Note: You need GCP bucket permissions to be able to run this command
 Predictions from this script are saved to outputs directory. To make a prediction, use
 ```bash
 python mlops_project/predict_model.py \
---model_path=/your/model/path.txt \
+--model_path=/your/model/path.pth \
 --dataset_path=/your/data/path.txt
 ```
 
@@ -137,13 +137,20 @@ docker run -p 8080:8080 -e PORT=8080 inference_api:latest
 
 You can also use the predict_model docker image by mounting with your machine for your model weights and dataset
 ```bash
-docker run -v /home/user/models:/container/models \
-           -v /home/user/data:/container/data \
+docker run -v /to/your/model/weight/path/best-checkpoint.pth:/container/models/best-checkpoint.pth \
+           -v /to/your/test_path/test_text.txt:/container/data/test_text.txt \
+           -v /to/your/outputs/predictions:/lmu-mlops-project/outputs/predictions \
            predict_model:latest \
-           --model_path /container/models/model.pth \
+           --model_path /container/models/best-checkpoint.pth \
            --dataset_path /container/data/test_text.txt
 ```
 
+To run training docker container use:
+```bash
+docker run -e WANDB_API_KEY=your_wandb_api_key \
+train_model:latest --config=mlops_project/config/config-defaults.yaml
+```
+
 ## Tests
 
 Unit tests for this repo can be found in the ``tests/`` directory.
diff --git a/dockerfiles/train_model.dockerfile b/dockerfiles/train_model.dockerfile
@@ -1,10 +1,11 @@
 # Base image
 FROM hatespeech-base
 
+WORKDIR /lmu-mlops-project
+
 COPY pyproject.toml pyproject.toml
 COPY mlops_project/ mlops_project/
 COPY utils/ mlops_project/utils/
 COPY data/ data/
 
-WORKDIR /mlops_project
-ENTRYPOINT ["python3", "-u", "train_model.py"]
+ENTRYPOINT ["python3", "-u", "mlops_project/train_model.py"]
diff --git a/mlops_project/predict_model.py b/mlops_project/predict_model.py
@@ -26,7 +26,8 @@ def predict(model_path: str, dataset_path: str) -> None:
     )
 
     # Load the model weights
-    model.load_state_dict(torch.load(model_path))
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model.load_state_dict(torch.load(model_path, map_location=device))
     model.eval()
 
     # Set the device to GPU if available
diff --git a/mlops_project/train_model.py b/mlops_project/train_model.py
@@ -68,15 +68,13 @@ def main():
     train_dataloader = DataLoader(
         train_set,
         worker_init_fn=seed_worker,
-        num_workers=7,
         generator=g,
         sampler=RandomSampler(train_set),
         batch_size=wandb.config.BATCH_SIZE,
     )
     validation_dataloader = DataLoader(
         val_set,
         worker_init_fn=seed_worker,
-        num_workers=7,
         generator=g,
         sampler=SequentialSampler(val_set),
         batch_size=wandb.config.BATCH_SIZE,

Original file line number	Diff line number	Diff line change
`@@ -26,7 +26,8 @@ def predict(model_path: str, dataset_path: str) -> None:`
`26`	`26`	`)`
`27`	`27`
`28`	`28`	`# Load the model weights`
`29`		`- model.load_state_dict(torch.load(model_path))`
	`29`	`+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")`
	`30`	`+ model.load_state_dict(torch.load(model_path, map_location=device))`
`30`	`31`	`model.eval()`
`31`	`32`
`32`	`33`	`# Set the device to GPU if available`