Skip to content

Commit

Permalink
Merge pull request #100 from NeotomaDB/dev
Browse files Browse the repository at this point in the history
Update Sample Parquet for Data Review Tool
  • Loading branch information
tieandrews authored Jun 29, 2023
2 parents 65de6fb + c6f29b3 commit f6e6e37
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ pip install -r requirements.txt

For conda:
```bash
conda install environment.yml
conda env create -f environment.yml
```

If you plan to use the pre-built Docker images, install Docker following these [instructions](https://docs.docker.com/get-docker/)
Expand Down
23 changes: 23 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,28 @@
version: "3.9"
services:
article-relevance-prediction:
image: metaextractor-article-relevance:v0.0.1
build:
dockerfile: ./docker/article-relevance/Dockerfile
context: .
environment:
- N_RECENT=10
- MIN_DATE=
- MAX_DATE=
- TERM=
- AUTO_MIN_DATE=False
- AUTO_CHECK_DUP=False
- OUTPUT_PATH=/outputs/
- SEND_XDD=False
- DOI_FILE_PATH=/raw/gdd_api_return.json
- MODEL_PATH=/models/logistic_regression_model.joblib

volumes:
- ./data/article-relevance/outputs:/output
- ./data/article-relevance/processed/prediction_parquet:/parquet
- ./data/article-relevance/raw:/raw
- ./models/article-relevance:/models

data-review-tool:
image: metaextractor-data-review-tool:v0.0.1
build:
Expand Down
10 changes: 6 additions & 4 deletions docker/article-relevance/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Use the official Python 3.10 image as the base image
FROM python:3.10

ENV DOI_PATH="/raw"
ENV PARQUET_PATH="/parquet"
# Set the working directory inside the container
WORKDIR /app/

Expand All @@ -13,9 +15,6 @@ RUN pip install --no-cache-dir -r requirements.txt
# Copy the entire repository folder into the container
COPY src ./src

# Copy the model folder into the container
COPY models/article-relevance ./models/article-relevance

# Copy the shell script to the container
COPY docker/article-relevance/run-prediction.sh .

Expand All @@ -24,7 +23,10 @@ RUN chmod +x run-prediction.sh

# Mount volumes
RUN mkdir -p /output
VOLUME ["/output"]
RUN mkdir -p /raw
RUN mkdir -p /parquet
RUN mkdir -p /models
VOLUME ["/output", "/parquet", "/raw", "/models"]

# Set the entry point for the Docker container
ENTRYPOINT ["./run-prediction.sh"]
12 changes: 11 additions & 1 deletion src/article_relevance/relevance_prediction_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,9 +422,19 @@ def main():
opt = docopt(__doc__)

doi_list_file_path = opt["--doi_file_path"]
model_path = opt['--model_path']
output_path = opt['--output_path']
send_xdd = opt['--send_xdd']

# # /models directory is a mounted volume, containing the model object
# models = os.listdir("/models")
# models = [f for f in models if f.endswith(".joblib")]

# if models:
# model_path = os.path.join("/models", models[0])
# else:
# model_path = ""

model_path = opt['--model_path']

metadata_df = crossref_extract(doi_list_file_path)

Expand Down

0 comments on commit f6e6e37

Please sign in to comment.