Merge pull request #35 from enesozeren/model_weight_fix

jsbroden · web-flow · commit 548d0769a3f8 · 2024-07-15T08:11:39.000+02:00
model weight usage fix
diff --git a/README.md b/README.md
@@ -128,6 +128,16 @@ To build the docker image for inference api, use
 docker build -f dockerfiles/inference_api.dockerfile . -t inference_api:latest
 ```
 
+To build the docker image for prediction, use
+```bash
+docker build -f dockerfiles/predict_model.dockerfile . -t predict_model:latest
+```
+
+To build the docker image for training, use
+```bash
+docker build -f dockerfiles/train_model.dockerfile . -t train_model:latest
+```
+
 ### Running Docker Containers
 
 To run the docker image for inference api, use
diff --git a/api/main.py b/api/main.py
@@ -12,18 +12,19 @@
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Load and clean up model on startup and shutdown."""
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # Load the tokenizer
     tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
 
-    # Get the model from the saved checkpoint
-
+    # Load the model
     model = BertForSequenceClassification.from_pretrained(
-        "bert-base-uncased", num_labels=2, output_attentions=False, output_hidden_states=False
+        "bert-base-uncased",
+        num_labels=2,
+        output_attentions=False,
+        output_hidden_states=False,
+        state_dict=torch.load(MODEL_PATH, map_location=device),
     )
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
     model.eval()
-    model.to(device)
 
     # Set the model and tokenizer in the app state
     app.state.tokenizer = tokenizer
diff --git a/dockerfiles/predict_model.dockerfile b/dockerfiles/predict_model.dockerfile
@@ -14,9 +14,6 @@ COPY outputs/ outputs/
 # Set environment variable
 ENV PYTHONPATH=/lmu-mlops-project
 
-# Do not set the directory to root
-RUN pip install . --no-deps --no-cache-dir
-
 # Set the entrypoint to the python script
 ENTRYPOINT ["python3", "-u", "mlops_project/predict_model.py"]
 
diff --git a/mlops_project/models/.gitkeep b/mlops_project/models/.gitkeep
diff --git a/mlops_project/predict_model.py b/mlops_project/predict_model.py
@@ -17,17 +17,18 @@ def predict(model_path: str, dataset_path: str) -> None:
         Tensor of shape [N, d] where N is the number of samples and d is the output dimension of the model
     """
 
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     # Load the tokenizer
     tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
 
     # Load the model
     model = BertForSequenceClassification.from_pretrained(
-        "bert-base-uncased", num_labels=2, output_attentions=False, output_hidden_states=False
+        "bert-base-uncased",
+        num_labels=2,
+        output_attentions=False,
+        output_hidden_states=False,
+        state_dict=torch.load(model_path, map_location=device),
     )
-
-    # Load the model weights
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model.load_state_dict(torch.load(model_path, map_location=device))
     model.eval()
 
     # Read the dataset