From bbd39eba87c0ec899950f3dfb96fbf23de05a949 Mon Sep 17 00:00:00 2001
From: Chengjie Li <109656400+ChengjieLi28@users.noreply.github.com>
Date: Fri, 1 Nov 2024 15:46:18 +0800
Subject: [PATCH] BLD: Remove Python 3.8 & Support Python 3.12 (#2503)

---
 .github/workflows/docker-cd.yaml              | 20 ------------------
 .github/workflows/python.yaml                 | 21 ++++++++++++-------
 pyproject.toml                                |  7 ++++---
 setup.cfg                                     |  4 ++--
 .../core/tests/test_continuous_batching.py    | 14 +++----------
 5 files changed, 23 insertions(+), 43 deletions(-)

diff --git a/.github/workflows/docker-cd.yaml b/.github/workflows/docker-cd.yaml
index 5048a8910c..419dc7bfbb 100644
--- a/.github/workflows/docker-cd.yaml
+++ b/.github/workflows/docker-cd.yaml
@@ -73,26 +73,6 @@ jobs:
             echo "XINFERENCE_GIT_TAG=${GIT_TAG}" >> $GITHUB_ENV
           fi
 
-      - name: Log in to Aliyun Docker Hub
-        uses: docker/login-action@v1
-        with:
-          registry: registry.cn-hangzhou.aliyuncs.com
-          username: ${{ secrets.DOCKERHUB_ALIYUN_USERNAME }}
-          password: ${{ secrets.DOCKERHUB_ALIYUN_PASSWORD }}
-
-      - name: Push docker image to Aliyun
-        shell: bash
-        if: ${{ github.repository == 'xorbitsai/inference' }}
-        env:
-          DOCKER_ORG: registry.cn-hangzhou.aliyuncs.com/xprobe_xinference
-        run: |
-          if [[ -n "$XINFERENCE_GIT_TAG" ]]; then
-            docker tag "xprobe/xinference:${XINFERENCE_GIT_TAG}" "$DOCKER_ORG/xinference:latest"
-            docker push "$DOCKER_ORG/xinference:latest"
-            docker tag "xprobe/xinference:${XINFERENCE_GIT_TAG}-cpu" "$DOCKER_ORG/xinference:latest-cpu"
-            docker push "$DOCKER_ORG/xinference:latest-cpu"
-          fi
-
       - name: Clean docker image cache
         shell: bash
         if: ${{ github.repository == 'xorbitsai/inference' }}
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
index 81a16122c5..3c4c9d91cd 100644
--- a/.github/workflows/python.yaml
+++ b/.github/workflows/python.yaml
@@ -74,13 +74,13 @@ jobs:
       fail-fast: false
       matrix:
         os: [ "ubuntu-latest", "macos-12", "windows-latest" ]
-        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
         module: [ "xinference" ]
         exclude:
-          - { os: macos-12, python-version: 3.9 }
           - { os: macos-12, python-version: 3.10 }
-          - { os: windows-latest, python-version: 3.9 }
+          - { os: macos-12, python-version: 3.11 }
           - { os: windows-latest, python-version: 3.10 }
+          - { os: windows-latest, python-version: 3.11 }
         include:
           - { os: self-hosted, module: gpu, python-version: 3.9}
           - { os: macos-latest, module: metal, python-version: "3.10" }
@@ -99,6 +99,12 @@ jobs:
           python-version: ${{ matrix.python-version }}
           activate-environment: ${{ env.CONDA_ENV }}
 
+      # Important for python == 3.12
+      - name: Update pip and setuptools
+        if: ${{ matrix.python-version == '3.12' }}
+        run: |
+          python -m pip install -U pip setuptools
+
       - name: Install dependencies
         env:
           MODULE: ${{ matrix.module }}
@@ -118,8 +124,7 @@ jobs:
           pip install transformers
           pip install attrdict
           pip install "timm>=0.9.16"
-          pip install torch
-          pip install torchvision
+          pip install torch torchvision
           pip install accelerate
           pip install sentencepiece
           pip install transformers_stream_generator
@@ -133,7 +138,6 @@ jobs:
           pip install -e ".[dev]"
           pip install "jinja2==3.1.2"
           pip install tensorizer
-          pip install eva-decord
           pip install jj-pytorchvideo
           pip install qwen-vl-utils
           pip install datamodel_code_generator
@@ -176,6 +180,9 @@ jobs:
             ${{ env.SELF_HOST_PYTHON }} -m pip uninstall -y "faster_whisper"
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U accelerate
             ${{ env.SELF_HOST_PYTHON }} -m pip install -U verovio
+            ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \
+              --disable-warnings \
+              --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/core/tests/test_continuous_batching.py && \
             ${{ env.SELF_HOST_PYTHON }} -m pytest --timeout=1500 \
               -W ignore::PendingDeprecationWarning \
               --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/model/image/tests/test_stable_diffusion.py && \
@@ -207,6 +214,6 @@ jobs:
               --cov-config=setup.cfg --cov-report=xml --cov=xinference xinference/client/tests/test_client.py
             pytest --timeout=1500 \
               -W ignore::PendingDeprecationWarning \
-              --cov-config=setup.cfg --cov-report=xml --cov=xinference --ignore xinference/client/tests/test_client.py --ignore xinference/model/image/tests/test_stable_diffusion.py --ignore xinference/model/image/tests/test_got_ocr2.py --ignore xinference/model/audio/tests xinference
+              --cov-config=setup.cfg --cov-report=xml --cov=xinference --ignore xinference/core/tests/test_continuous_batching.py --ignore xinference/client/tests/test_client.py --ignore xinference/model/image/tests/test_stable_diffusion.py --ignore xinference/model/image/tests/test_got_ocr2.py --ignore xinference/model/audio/tests xinference
           fi
         working-directory: .
diff --git a/pyproject.toml b/pyproject.toml
index 72e171146d..21b135b1d5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,7 @@
 [build-system]
 requires = [
-    "setuptools<64",
+    "setuptools<64; python_version<'3.12'",
+    "setuptools>=75; python_version>='3.12'"
 ]
 build-backend = "setuptools.build_meta"
 
@@ -17,6 +18,6 @@ extend-exclude = '''
 asyncio_mode = "auto"
 
 [tool.cibuildwheel]
-build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*"]
-skip = "pp* *musllinux* *i686 cp36* cp310-win32 cp311-win32"
+build = ["cp39-*", "cp310-*", "cp311-*", "cp312-*"]
+skip = "pp* *musllinux* *i686 cp36* cp39-win32 cp310-win32 cp311-win32 cp312-win32"
 manylinux-x86_64-image = "manylinux2014"
diff --git a/setup.cfg b/setup.cfg
index 3c08363e59..7b172a4052 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -7,15 +7,15 @@ maintainer = Qin Xuye
 maintainer_email = qinxuye@xprobe.io
 license = Apache License 2.0
 url = https://github.com/xorbitsai/inference
-python_requires = >=3.8
+python_requires = >=3.9
 classifier =
     Operating System :: OS Independent
     Programming Language :: Python
     Programming Language :: Python :: 3
-    Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
     Programming Language :: Python :: 3.11
+    Programming Language :: Python :: 3.12
     Programming Language :: Python :: Implementation :: CPython
     Topic :: Software Development :: Libraries
 
diff --git a/xinference/core/tests/test_continuous_batching.py b/xinference/core/tests/test_continuous_batching.py
index c58b91bb55..00e875c743 100644
--- a/xinference/core/tests/test_continuous_batching.py
+++ b/xinference/core/tests/test_continuous_batching.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 
-import os
 import sys
 import threading
 import time
@@ -112,18 +111,11 @@ def run_internal(self):
         assert result["msg"] == self._expected_res
 
 
-@pytest.fixture
-def enable_batch():
-    os.environ["XINFERENCE_TRANSFORMERS_ENABLE_BATCHING"] = "1"
-    yield
-    os.environ["XINFERENCE_TRANSFORMERS_ENABLE_BATCHING"] = "0"
-
-
 @pytest.mark.skipif(
     sys.platform == "win32",
     reason="does not run on windows github CI due to its terrible runtime",
 )
-def test_continuous_batching(enable_batch, setup):
+def test_continuous_batching(setup):
     endpoint, _ = setup
     url = f"{endpoint}/v1/models"
     client = RESTfulClient(endpoint)
@@ -132,7 +124,7 @@ def test_continuous_batching(enable_batch, setup):
     payload = {
         "model_engine": "transformers",
         "model_type": "LLM",
-        "model_name": "qwen1.5-chat",
+        "model_name": "qwen2.5-instruct",
         "quantization": "none",
         "model_format": "pytorch",
         "model_size_in_billions": "0_5",
@@ -146,7 +138,7 @@ def test_continuous_batching(enable_batch, setup):
     response = requests.post(url, json=payload)
     response_data = response.json()
     model_uid_res = response_data["model_uid"]
-    assert model_uid_res == "qwen1.5-chat"
+    assert model_uid_res == "qwen2.5-instruct"
 
     model = client.get_model(model_uid_res)