Skip to content

Migrate to Dockerfile and bump python version to 3.8 as 3.6 is deprecated on AWS lambda #3

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 164 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
FROM amazonlinux:2.0.20221004.0 AS base
# FROM amazonlinux:2016.09
RUN set -ex

RUN # Set up env vars
ENV PYTHON_VER_YUM="38"
ENV PYTHON_VER="3.8"

# only for amazon linux images v2 - enable py3.8
RUN amazon-linux-extras enable python${PYTHON_VER}

RUN # Update this container
RUN echo "Yum updating container..." > /dev/null 2>&1
RUN yum -y update
RUN echo "Yum updating container...done" > /dev/null 2>&1


ENV LAMBDA_PACKAGE_DIR="outputs/lambda-package"
ENV LIB_DIR="${LAMBDA_PACKAGE_DIR}/lib"
ENV LAMBDA_PACKAGE_ZIP="lambda-package.zip"

ENV SITE_PACKAGES_DIR="/usr/local/lib64/python${PYTHON_VER}/site-packages"

RUN echo "Yum installing non-pip packages..." > /dev/null 2>&1
RUN yum -y install \
binutils \
pkgconfig \
atlas-devel \
libatlas-base-dev \
gfortran \
atlas-sse3-devel \
blas-devel \
findutils \
gcc \
gcc-c++ \
git \
gzip \
tar \
wget \
make \
lapack-devel \
findutils \
python${PYTHON_VER_YUM} \
python${PYTHON_VER_YUM}-devel \
python${PYTHON_VER_YUM}-dev \
python${PYTHON_VER_YUM}-virtualenv \
python${PYTHON_VER_YUM}-pip \
zip && \
yum clean all && \
rm -rf /var/cache/yum

RUN echo "Yum installing non-pip packages...done" > /dev/null 2>&1

ENV NUMPY_VER='1.19.5'
# ENV SCIPY_VER='0.19.1'
ENV SCIPY_VER='1.5.4'

RUN echo "Pip installing packages using local compilation for numpy and scipy..." > /dev/null 2>&1
RUN /usr/bin/python${PYTHON_VER} -m pip install --upgrade Cython==0.29.32 pip==22.3 setuptools==53.1.0
# RUN /usr/bin/python${PYTHON_VER} -m pip install --no-binary numpy==${NUMPY_VER}
RUN /usr/bin/python${PYTHON_VER} -m pip install numpy==${NUMPY_VER}
# RUN /usr/bin/python${PYTHON_VER} -m pip install --no-binary scipy scipy==${SCIPY_VER}
RUN /usr/bin/python${PYTHON_VER} -m pip install scipy scipy==${SCIPY_VER}
RUN /usr/bin/python${PYTHON_VER} -m pip install --target ${SITE_PACKAGES_DIR} xgboost==0.90
# RUN /usr/bin/python${PYTHON_VER} -m pip install xgboost==0.90

# RUN /usr/bin/python${PYTHON_VER} -m pip install --target ${SITE_PACKAGES_DIR} joblib
RUN echo "Pip installing packages using local compilation for numpy and scipy...done" > /dev/null 2>&1

RUN echo "Verfifying installation..." > /dev/null 2>&1
RUN /usr/bin/python${PYTHON_VER} -V
RUN /usr/bin/python${PYTHON_VER} -c "import numpy as np; print(np.version.version)"
RUN /usr/bin/python${PYTHON_VER} -c "import numpy as np; print(np.__config__.show())"
RUN /usr/bin/python${PYTHON_VER} -c "import scipy as sp; print(sp.version.version)"
RUN /usr/bin/python${PYTHON_VER} -c "import xgboost; print(xgboost.__version__)"
# RUN /usr/bin/python${PYTHON_VER} -c "import joblib; print(joblib.__version__)"

RUN echo "Verfifying installation...done" > /dev/null 2>&1

RUN echo "Preparing ${LIB_DIR}..." > /dev/null 2>&1
RUN mkdir -p "${LIB_DIR}"
RUN echo "Preparing ${LIB_DIR}...done" > /dev/null 2>&1
RUN ls "$SITE_PACKAGES_DIR"
RUN echo "Copying ${SITE_PACKAGES_DIR} contents to ${LAMBDA_PACKAGE_DIR}..." > /dev/null 2>&1
RUN mkdir -p "${LAMBDA_PACKAGE_DIR}"
RUN cp -rf ${SITE_PACKAGES_DIR}/* ${LAMBDA_PACKAGE_DIR}
RUN echo "Copying ${SITE_PACKAGES_DIR} contents to ${LAMBDA_PACKAGE_DIR}...done" > /dev/null 2>&1

RUN echo "Copying compiled libraries to ${LIB_DIR}..." > /dev/null 2>&1
RUN cp /usr/lib64/atlas/libsatlas.so.3 ${LIB_DIR}
RUN cp /usr/lib64/atlas/libtatlas.so.3 ${LIB_DIR}
RUN cp /usr/lib64/libblas.so.3 ${LIB_DIR}
RUN cp /usr/lib64/liblapack.so.3 ${LIB_DIR}
RUN cp /usr/lib64/libquadmath.so.0 ${LIB_DIR}
RUN cp /usr/lib64/libgfortran.so.4 ${LIB_DIR}
RUN cp /usr/lib64/libgomp.so.1 ${LIB_DIR}
RUN echo "Copying compiled libraries to ${LIB_DIR}...done" > /dev/null 2>&1

RUN echo "Reducing package size..." > /dev/null 2>&1
RUN echo "Original unzipped package size: $(du -sh ${LAMBDA_PACKAGE_DIR} | cut -f1)" > /dev/null 2>&1
# Remove README
# RUN rm ${LAMBDA_PACKAGE_DIR}/README
# Remove distribution info directories
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/*.egg-info
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/*.dist-info
# Remove all testing directories
RUN find ${LAMBDA_PACKAGE_DIR} -name tests | xargs rm -rf
# strip excess from compiled .so files
RUN find ${LAMBDA_PACKAGE_DIR} -name "*.so" | xargs strip

# use -strip-unneeded due "ELF load command address/offset not properly aligned" error
# RUN find ${LAMBDA_PACKAGE_DIR} -name "*.so" -not -path "${LAMBDA_PACKAGE_DIR}*scipy*/*" | xargs strip --strip-unneeded

# remove source folders
RUN find ${LAMBDA_PACKAGE_DIR} -name src | xargs rm -rf
# remove doc folders
RUN find ${LAMBDA_PACKAGE_DIR} -name doc | xargs rm -rf
# remove test folders
RUN find ${LAMBDA_PACKAGE_DIR} -type d -name "[Tt]est*" | xargs rm -rf
# remove pycache and pyc files
RUN find ${LAMBDA_PACKAGE_DIR} -type d -a -name '__pycache__' -print0 | xargs -0 rm -rf
RUN find ${LAMBDA_PACKAGE_DIR} -type f -a -name '*.pyc' -print0 | xargs -0 rm -f
# try to remove this too
RUN rm -rdf ${LAMBDA_PACKAGE_DIR}/bin/

RUN find ${LAMBDA_PACKAGE_DIR} -name '*.txt' -delete
RUN find ${LAMBDA_PACKAGE_DIR} -name '*.md' -delete

RUN rm -rf ${LAMBDA_PACKAGE_DIR}/*.egg-info
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/*.dist-info

# works if we remove following:
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/xgboost/dmlc-core
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/xgboost/rabit/
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/xgboost/make/
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/pyximport

# RUN rm -rf ${LAMBDA_PACKAGE_DIR}/Cython
# RUN rm -rf ${LAMBDA_PACKAGE_DIR}/cython*

RUN rm -rf ${LAMBDA_PACKAGE_DIR}/scipy.libs
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/scipy/fftpack
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/scipy/integrate
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/scipy/interpolate
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/scipy/signal
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/scipy/spatial
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/scipy/special
RUN rm -rf ${LAMBDA_PACKAGE_DIR}/scipy/stats


RUN echo "Final unzipped package size: $(du -sh ${LAMBDA_PACKAGE_DIR} | cut -f1)" > /dev/null 2>&1
RUN echo "Reducing package size...done" > /dev/null 2>&1

RUN echo "Compressing packages into ${LAMBDA_PACKAGE_ZIP}..." > /dev/null 2>&1
RUN pushd ${LAMBDA_PACKAGE_DIR} > /dev/null 2>&1 && zip -r9q /${LAMBDA_PACKAGE_ZIP} * ; popd > /dev/null 2>&1
RUN echo "lambda-package.zip size: $(du -sh ${LAMBDA_PACKAGE_ZIP} | cut -f1)" > /dev/null 2>&1
RUN echo "Compressing packages into lambda-package.zip...done" > /dev/null 2>&1

RUN echo "SUCCESS!!!" > /dev/null 2>&1

RUN echo "USAGE TIPS:" > /dev/null 2>&1
RUN echo " Add your lambda function handler module to the top level of ${LAMBDA_PACKAGE_ZIP} (optionally including the .pyc file in __pycache__)" > /dev/null 2>&1
RUN echo " --OR--" > /dev/null 2>&1
RUN echo " Add your lambda function handler module to the top level of ${LAMBDA_PACKAGE_DIR} (optionally including the .pyc file in __pycache__) and zip with maximum compression" > /dev/null 2>&1
13 changes: 5 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
# aws_lambda_xgboost
AWS Lambda Deployment Xgboost package Python 3.6
AWS Lambda Deployment Xgboost package Python 3.8

# Description

We all know with AWS Lambda *50Mb* limit on zip upload and *~262Mb* limit from AWS s3 unzipped total size. This script allows you to make a package that will have xgboost library with all the dependencies and joblib.

# Notes

- The final size is of the file in the zip is about 254M which leaves you about 8M for extra files and packages
- In most cases it's imposible to zip your models into the rest of 8Mb in the zip file (262 - 254) so here there a solotion with boto3 library how to load models from s3 buckets
- The final size is of the file in the zip is about 250M which leaves you about 12M for extra files and packages
- In most cases it's imposible to zip your models into the rest of 12Mb in the zip file (262 - 250) so here there a solotion with boto3 library how to load models from s3 buckets
- The model load call should be before lambda_function that's how it would be loaded once and not going to waste time in calls
- In the code it's suggested to load model into local AWS Lambda /tmp/ directory. That directory is limited to 500Mb

Expand All @@ -19,7 +19,7 @@ We all know with AWS Lambda *50Mb* limit on zip upload and *~262Mb* limit from A
2. Run this command from the repo folder

```{bash}
docker run -v $(pwd):/outputs -it amazonlinux:2016.09 /bin/bash /outputs/build.sh
sh build.sh
```

3. There will be generated folder __lambda-package__ and the zip file __lambda-package.zip__
Expand All @@ -42,7 +42,4 @@ docker run -v $(pwd):/outputs -it amazonlinux:2016.09 /bin/bash /outputs/build.s

# Credits

[Ryan Brown](https://github.com/ryansb) for providing original packaging sklearn and numpy [example](https://github.com/ryansb/sklearn-build-lambda)
Here it was used [modified version](https://github.com/ryansb/sklearn-build-lambda/pull/16/commits/75c713d23107300370b16b134936b959f1f0f73b) to Python 3.6 from [Mark Campanelli](https://github.com/markcampanelli)

[Jing Xie](https://www.linkedin.com/in/jing-xie-4a307012/) and [Ken Mcdonnell](https://www.linkedin.com/in/ken-mcdonnell-b438b237/) for helping with deploying models and debugging.
[Alexey Butyrev](https://github.com/alexeybutyrev) for providing original xgb packaging in [origin repo](https://github.com/alexeybutyrev/aws_lambda_xgboost)
110 changes: 17 additions & 93 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,93 +1,17 @@
#!/bin/bash
set -ex

# Update this container
echo "Yum updating container..." > /dev/null 2>&1
yum -y update
echo "Yum updating container...done" > /dev/null 2>&1

# Set up env vars
PYTHON_VER_YUM='36'
PYTHON_VER='3.6'
NUMPY_VER='1.13.3'
SCIPY_VER='0.19.1'
SKLEARN_VER='0.21.3'

LAMBDA_PACKAGE_DIR='outputs/lambda-package'
LIB_DIR="${LAMBDA_PACKAGE_DIR}/lib"
LAMBDA_PACKAGE_ZIP='lambda-package.zip'
LAMBDA_PACKAGE_ZIP_RELPATH="outputs/${LAMBDA_PACKAGE_ZIP}"

SITE_PACKAGES_DIR="/usr/local/lib64/python${PYTHON_VER}/site-packages"

echo "Yum installing non-pip packages..." > /dev/null 2>&1
yum -y install \
atlas-devel \
atlas-sse3-devel \
blas-devel \
findutils \
gcc \
gcc-c++ \
lapack-devel \
python${PYTHON_VER_YUM}-devel \
zip
echo "Yum installing non-pip packages...done" > /dev/null 2>&1

echo "Pip installing packages using local compilation for numpy and scipy..." > /dev/null 2>&1
/usr/bin/pip-${PYTHON_VER} install --upgrade pip==9.0.3 setuptools
/usr/bin/pip-${PYTHON_VER} install --no-binary numpy numpy==${NUMPY_VER}
/usr/bin/pip-${PYTHON_VER} install --no-binary scipy scipy==${SCIPY_VER}
/usr/bin/pip-${PYTHON_VER} install --target $SITE_PACKAGES_DIR xgboost==0.90
/usr/bin/pip-${PYTHON_VER} install --target $SITE_PACKAGES_DIR joblib
echo "Pip installing packages using local compilation for numpy and scipy...done" > /dev/null 2>&1

echo "Verfifying installation..." > /dev/null 2>&1
/usr/bin/python${PYTHON_VER} -V
/usr/bin/python${PYTHON_VER} -c "import numpy as np; print(np.version.version)"
/usr/bin/python${PYTHON_VER} -c "import numpy as np; print(np.__config__.show())"
/usr/bin/python${PYTHON_VER} -c "import scipy as sp; print(sp.version.version)"
/usr/bin/python${PYTHON_VER} -c "import xgboost; print(xgboost.__version__)"
/usr/bin/python${PYTHON_VER} -c "import joblib; print(joblib.__version__)"

echo "Verfifying installation...done" > /dev/null 2>&1

echo "Preparing ${LIB_DIR}..." > /dev/null 2>&1
mkdir -p ${LIB_DIR}
echo "Preparing ${LIB_DIR}...done" > /dev/null 2>&1
ls $SITE_PACKAGES_DIR
echo "Copying ${SITE_PACKAGES_DIR} contents to ${LAMBDA_PACKAGE_DIR}..." > /dev/null 2>&1
cp -rf ${SITE_PACKAGES_DIR}/* ${LAMBDA_PACKAGE_DIR}
echo "Copying ${SITE_PACKAGES_DIR} contents to ${LAMBDA_PACKAGE_DIR}...done" > /dev/null 2>&1

echo "Copying compiled libraries to ${LIB_DIR}..." > /dev/null 2>&1
cp /usr/lib64/atlas/* ${LIB_DIR}
cp /usr/lib64/libquadmath.so.0 ${LIB_DIR}
cp /usr/lib64/libgfortran.so.3 ${LIB_DIR}
echo "Copying compiled libraries to ${LIB_DIR}...done" > /dev/null 2>&1

echo "Reducing package size..." > /dev/null 2>&1
echo "Original unzipped package size: $(du -sh ${LAMBDA_PACKAGE_DIR} | cut -f1)" > /dev/null 2>&1
# Remove README
rm ${LAMBDA_PACKAGE_DIR}/README
# Remove distribution info directories
rm -rf ${LAMBDA_PACKAGE_DIR}/*.egg-info
rm -rf ${LAMBDA_PACKAGE_DIR}/*.dist-info
# Remove all testing directories
find ${LAMBDA_PACKAGE_DIR} -name tests | xargs rm -rf
# strip excess from compiled .so files
find ${LAMBDA_PACKAGE_DIR} -name "*.so" | xargs strip
echo "Final unzipped package size: $(du -sh ${LAMBDA_PACKAGE_DIR} | cut -f1)" > /dev/null 2>&1
echo "Reducing package size...done" > /dev/null 2>&1

echo "Compressing packages into ${LAMBDA_PACKAGE_ZIP}..." > /dev/null 2>&1
pushd ${LAMBDA_PACKAGE_DIR} > /dev/null 2>&1 && zip -r9q /${LAMBDA_PACKAGE_ZIP_RELPATH} * ; popd > /dev/null 2>&1
echo "lambda-package.zip size: $(du -sh ${LAMBDA_PACKAGE_ZIP_RELPATH} | cut -f1)" > /dev/null 2>&1
echo "Compressing packages into lambda-package.zip...done" > /dev/null 2>&1

echo "SUCCESS!!!" > /dev/null 2>&1

echo "USAGE TIPS:" > /dev/null 2>&1
echo " Add your lambda function handler module to the top level of ${LAMBDA_PACKAGE_ZIP_RELPATH} (optionally including the .pyc file in __pycache__)" > /dev/null 2>&1
echo " --OR--" > /dev/null 2>&1
echo " Add your lambda function handler module to the top level of ${LAMBDA_PACKAGE_DIR} (optionally including the .pyc file in __pycache__) and zip with maximum compression" > /dev/null 2>&1

rm -rf lambda-packag*

docker build --platform linux/amd64 -t xgb_lambda .

id=$(docker create xgb_lambda)
docker cp $id:/lambda-package.zip ./

mkdir lambda-package
cd lambda-package
unzip ../lambda-package.zip
cd ..
cp lambda_xgb_version.py lambda-package/
docker run --rm \
-v "$PWD"/lambda-package:/var/task:ro,delegated \
lambci/lambda:python3.8 \
lambda_xgb_version.test
# [-v <layer_dir>:/opt:ro,delegated] \
Binary file modified lambda-package.zip
Binary file not shown.
7 changes: 7 additions & 0 deletions lambda_xgb_version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import numpy as np
import scipy
import xgboost as xgb


def test(event, context):
return xgb.__version__