plctlab · joy2myself · May 12, 2025 · Jan 22, 2025 · Apr 11, 2025 · Apr 11, 2025
diff --git a/.github/workflows/linux_qemu.yml b/.github/workflows/linux_qemu.yml
@@ -73,14 +73,6 @@ jobs:
               "(test_kind or test_multiarray or test_simd or test_umath or test_ufunc) and not test_gcd_overflow",
               "s390x"
             ]
-          - [
-              "riscv64",
-              "riscv64-linux-gnu",
-              "riscv64/ubuntu:22.04",
-              "-Dallow-noblas=true",
-              "test_kind or test_multiarray or test_simd or test_umath or test_ufunc",
-              "riscv64"
-            ]
     env:
       TOOLCHAIN_NAME: ${{ matrix.BUILD_PROP[1] }}
       DOCKER_CONTAINER: ${{ matrix.BUILD_PROP[2] }}
@@ -170,7 +162,7 @@ jobs:
           '"
 
 
-  linux_loongarch64_qemu:
+  linux_loongarch64_riscv64_qemu:
     # Only workflow_dispatch is enabled on forks.
     # To enable this job and subsequent jobs on a fork for other events, comment out:
     if: github.repository == 'numpy/numpy' || github.event_name == 'workflow_dispatch'
@@ -267,3 +259,108 @@ jobs:
         /bin/script -e -q -c "/bin/bash --noprofile --norc -eo pipefail -c '
           cd /numpy && spin test -- -k \"${RUNTIME_TEST_FILTER}\"
         '"
+
+
+  linux_riscv64_qemu:
+    # To enable this workflow on a fork, comment out:
+    if: github.repository == 'numpy/numpy'
+    runs-on: ubuntu-24.04
+    continue-on-error: true
+    strategy:
+      fail-fast: false
+      matrix:
+        BUILD_PROP:
+          - [
+              "riscv64",
+              "riscv64-linux-gnu",
+              "riscv64/ubuntu:24.04",
+              "-Dallow-noblas=true",
+              "test_kind or test_multiarray or test_simd or test_umath or test_ufunc",
+              "riscv64"
+            ]
+    env:
+      TOOLCHAIN_NAME: ${{ matrix.BUILD_PROP[1] }}
+      DOCKER_CONTAINER: ${{ matrix.BUILD_PROP[2] }}
+      MESON_OPTIONS: ${{ matrix.BUILD_PROP[3] }}
+      RUNTIME_TEST_FILTER: ${{ matrix.BUILD_PROP[4] }}
+      ARCH: ${{ matrix.BUILD_PROP[5] }}
+      TERM: xterm-256color
+
+    name: "${{ matrix.BUILD_PROP[0] }}"
+    steps:
+    - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+      with:
+        submodules: recursive
+        fetch-tags: true
+        persist-credentials: false
+
+    - name: Initialize binfmt_misc for qemu-user-static
+      run: |
+        docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
+
+    - name: Install GCC cross-compilers
+      run: |
+        sudo apt update
+        sudo apt install -y ninja-build gcc-14-${TOOLCHAIN_NAME} g++-14-${TOOLCHAIN_NAME} gfortran-14-${TOOLCHAIN_NAME}
+
+    - name: Cache docker container
+      uses: actions/cache@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0
+      id: container-cache
+      with:
+        path: ~/docker_${{ matrix.BUILD_PROP[1] }}
+        key: container-${{ runner.os }}-${{ matrix.BUILD_PROP[1] }}-${{ matrix.BUILD_PROP[2] }}-${{ hashFiles('requirements/build_requirements.txt') }}
+
+    - name: Creates new container
+      if: steps.container-cache.outputs.cache-hit != 'true'
+      run: |
+        docker run --platform=linux/${ARCH} --name the_container --interactive \
+          -v /:/host -v $(pwd):/numpy ${DOCKER_CONTAINER} /bin/bash -c "
+          apt update &&
+          apt install -y cmake git python3 python-is-python3 python3-dev python3-pip &&
+          mkdir -p /lib64 && ln -s /host/lib64/ld-* /lib64/ &&
+          ln -s /host/lib/x86_64-linux-gnu /lib/x86_64-linux-gnu &&
+          rm -rf /usr/${TOOLCHAIN_NAME} && ln -s /host/usr/${TOOLCHAIN_NAME} /usr/${TOOLCHAIN_NAME} &&
+          rm -rf /usr/lib/gcc/${TOOLCHAIN_NAME} && ln -s /host/usr/lib/gcc-cross/${TOOLCHAIN_NAME} /usr/lib/gcc/${TOOLCHAIN_NAME} &&
+          rm -f /usr/bin/gcc && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-gcc-14 /usr/bin/gcc &&
+          rm -f /usr/bin/g++ && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-g++-14 /usr/bin/g++ &&
+          rm -f /usr/bin/gfortran && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-gfortran-14 /usr/bin/gfortran &&
+          rm -f /usr/bin/ar && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ar /usr/bin/ar &&
+          rm -f /usr/bin/as && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-as /usr/bin/as &&
+          rm -f /usr/bin/ld && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ld /usr/bin/ld &&
+          rm -f /usr/bin/ld.bfd && ln -s /host/usr/bin/${TOOLCHAIN_NAME}-ld.bfd /usr/bin/ld.bfd &&
+          rm -f /usr/bin/ninja && ln -s /host/usr/bin/ninja /usr/bin/ninja &&
+          git config --global --add safe.directory /numpy &&
+          # No need to build ninja from source, the host ninja is used for the build
+          grep -v ninja /numpy/requirements/build_requirements.txt > /tmp/build_requirements.txt &&
+          python -m pip install --break-system-packages -r /tmp/build_requirements.txt &&
+          python -m pip install --break-system-packages pytest pytest-xdist hypothesis typing_extensions pytest-timeout &&
+          rm -f /usr/local/bin/ninja && mkdir -p /usr/local/bin && ln -s /host/usr/bin/ninja /usr/local/bin/ninja
+        "
+        docker commit the_container the_container
+        mkdir -p "~/docker_${TOOLCHAIN_NAME}"
+        docker save -o "~/docker_${TOOLCHAIN_NAME}/the_container.tar" the_container
+
+    - name: Load container from cache
+      if: steps.container-cache.outputs.cache-hit == 'true'
+      run: docker load -i "~/docker_${TOOLCHAIN_NAME}/the_container.tar"
+
+    - name: Meson Build
+      run: |
+        docker run --rm --platform=linux/${ARCH} -e "TERM=xterm-256color" \
+          -v $(pwd):/numpy -v /:/host the_container \
+          /bin/script -e -q -c "/bin/bash --noprofile --norc -eo pipefail -c '
+            cd /numpy && spin build --clean -- ${MESON_OPTIONS}
+          '"
+
+    - name: Meson Log
+      if: always()
+      run: 'cat build/meson-logs/meson-log.txt'
+
+    - name: Run Tests
+      run: |
+        docker run --rm --platform=linux/${ARCH} -e "TERM=xterm-256color" \
+          -v $(pwd):/numpy -v /:/host the_container \
+          /bin/script -e -q -c "/bin/bash --noprofile --norc -eo pipefail -c '
+            export F90=/usr/bin/gfortran
+            cd /numpy && spin test -- --timeout=600 --durations=10 -k \"${RUNTIME_TEST_FILTER}\"
+          '"
diff --git a/meson.options b/meson.options
@@ -35,7 +35,7 @@ option('test-simd', type: 'array',
           'VSX', 'VSX2', 'VSX3', 'VSX4',
           'NEON', 'ASIMD',
           'VX', 'VXE', 'VXE2',
-          'LSX',
+          'LSX', 'RVV',
         ],
         description: 'Specify a list of CPU features to be tested against NumPy SIMD interface')
 option('test-simd-args', type: 'string', value: '',

diff --git a/meson_cpu/meson.build b/meson_cpu/meson.build
@@ -97,7 +97,7 @@ min_features = {
   's390x': [],
   'arm': [],
   'aarch64': [ASIMD],
-  'riscv64': [],
+  'riscv64': [RVV],
   'wasm32': [],
   'loongarch64': [LSX],
 }.get(cpu_family, [])

diff --git a/numpy/_core/meson.build b/numpy/_core/meson.build
@@ -103,6 +103,10 @@ if host_machine.cpu_family() == 'loongarch64'
   add_project_arguments(['-DHWY_COMPILE_ONLY_SCALAR'], language: ['cpp'])
 endif
 
+if host_machine.cpu_family() == 'riscv64'
+  add_project_arguments('-march=rv64gcv_zvl256b', '-mrvv-vector-bits=256', language: ['c','cpp'])
+endif
+
 use_highway = not get_option('disable-highway')
 if use_highway and not fs.exists('src/highway/README.md')
   error('Missing the `highway` git submodule! Run `git submodule update --init` to fix this.')
@@ -750,6 +754,7 @@ _umath_tests_mtargets = mod_features.multi_targets(
     ASIMDHP, ASIMD, NEON,
     VSX3, VSX2, VSX,
     VXE, VX,
+    RVV,
   ],
   baseline: CPU_BASELINE,
   prefix: 'NPY_',
@@ -794,7 +799,8 @@ foreach gen_mtargets : [
       AVX512_SKX, AVX2, XOP, SSE42, SSE2,
       VSX2,
       ASIMD, NEON,
-      VXE, VX
+      VXE, VX,
+      RVV,
     ]
   ],
 ]
@@ -897,6 +903,7 @@ foreach gen_mtargets : [
       VSX3, VSX2,
       VXE, VX,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -908,6 +915,7 @@ foreach gen_mtargets : [
       VSX4, VSX2,
       VX,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -919,6 +927,7 @@ foreach gen_mtargets : [
       NEON,
       VXE, VX,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -937,6 +946,7 @@ foreach gen_mtargets : [
       NEON_VFPV4,
       VXE,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -960,6 +970,7 @@ foreach gen_mtargets : [
       VSX2,
       VXE, VX,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -978,6 +989,7 @@ foreach gen_mtargets : [
       NEON_VFPV4,
       VXE2, VXE,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -994,6 +1006,7 @@ foreach gen_mtargets : [
       VSX2,
       VXE, VX,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -1005,6 +1018,7 @@ foreach gen_mtargets : [
       ASIMD, NEON,
       VXE, VX,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -1015,6 +1029,7 @@ foreach gen_mtargets : [
       VSX2,
       ASIMD, NEON,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -1026,6 +1041,7 @@ foreach gen_mtargets : [
       VSX3, VSX2,
       VXE, VX,
       LSX,
+      RVV,
     ]
   ],
   [
@@ -1037,6 +1053,7 @@ foreach gen_mtargets : [
       VSX2,
       VX,
       LSX,
+      RVV,
     ]
   ],
 ]

diff --git a/numpy/_core/src/common/simd/intdiv.h b/numpy/_core/src/common/simd/intdiv.h
@@ -220,6 +220,10 @@ NPY_FINLINE npyv_u8x3 npyv_divisor_u8(npy_uint8 d)
     divisor.val[0] = npyv_setall_u8(m);
     divisor.val[1] = npyv_setall_u8(sh1);
     divisor.val[2] = npyv_setall_u8(sh2);
+#elif defined(NPY_HAVE_RVV)
+    divisor.val[0] = npyv_setall_u8(m);
+    divisor.val[1] = npyv_setall_u8(sh1);
+    divisor.val[2] = npyv_setall_u8(sh2);
 #else
     #error "please initialize the shifting operand for the new architecture"
 #endif
@@ -253,7 +257,7 @@ NPY_FINLINE npyv_s8x3 npyv_divisor_s8(npy_int8 d)
     npyv_s8x3 divisor;
     divisor.val[0] = npyv_setall_s8(m);
     divisor.val[2] = npyv_setall_s8(d < 0 ? -1 : 0);
-    #if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX) || defined(NPY_HAVE_LSX)
+    #if defined(NPY_HAVE_VSX2) || defined(NPY_HAVE_VX) || defined(NPY_HAVE_LSX) || defined(NPY_HAVE_RVV)
         divisor.val[1] = npyv_setall_s8(sh);
     #elif defined(NPY_HAVE_NEON)
         divisor.val[1] = npyv_setall_s8(-sh);
@@ -298,6 +302,9 @@ NPY_FINLINE npyv_u16x3 npyv_divisor_u16(npy_uint16 d)
 #elif defined(NPY_HAVE_LSX)
     divisor.val[1] = npyv_setall_u16(sh1);
     divisor.val[2] = npyv_setall_u16(sh2);
+#elif defined(NPY_HAVE_RVV)
+    divisor.val[1] = npyv_setall_u16(sh1);
+    divisor.val[2] = npyv_setall_u16(sh2);
 #else
     #error "please initialize the shifting operand for the new architecture"
 #endif
@@ -330,6 +337,8 @@ NPY_FINLINE npyv_s16x3 npyv_divisor_s16(npy_int16 d)
     divisor.val[1] = npyv_setall_s16(-sh);
 #elif defined(NPY_HAVE_LSX)
     divisor.val[1] = npyv_setall_s16(sh);
+#elif defined(NPY_HAVE_RVV)
+    divisor.val[1] = npyv_setall_s16(sh);
 #else
     #error "please initialize the shifting operand for the new architecture"
 #endif
@@ -370,6 +379,9 @@ NPY_FINLINE npyv_u32x3 npyv_divisor_u32(npy_uint32 d)
 #elif defined(NPY_HAVE_LSX)
     divisor.val[1] = npyv_setall_u32(sh1);
     divisor.val[2] = npyv_setall_u32(sh2);
+#elif defined(NPY_HAVE_RVV)
+    divisor.val[1] = npyv_setall_u32(sh1);
+    divisor.val[2] = npyv_setall_u32(sh2);
 #else
     #error "please initialize the shifting operand for the new architecture"
 #endif
@@ -407,6 +419,8 @@ NPY_FINLINE npyv_s32x3 npyv_divisor_s32(npy_int32 d)
     divisor.val[1] = npyv_setall_s32(-sh);
 #elif defined(NPY_HAVE_LSX)
     divisor.val[1] = npyv_setall_s32(sh);
+#elif defined(NPY_HAVE_RVV)
+    divisor.val[1] = npyv_setall_s32(sh);
 #else
     #error "please initialize the shifting operand for the new architecture"
 #endif
@@ -444,6 +458,9 @@ NPY_FINLINE npyv_u64x3 npyv_divisor_u64(npy_uint64 d)
     #elif defined(NPY_HAVE_LSX)
         divisor.val[1] = npyv_setall_u64(sh1);
         divisor.val[2] = npyv_setall_u64(sh2);
+    #elif defined(NPY_HAVE_RVV)
+        divisor.val[1] = npyv_setall_u64(sh1);
+        divisor.val[2] = npyv_setall_u64(sh2);
     #else
         #error "please initialize the shifting operand for the new architecture"
     #endif
@@ -484,6 +501,8 @@ NPY_FINLINE npyv_s64x3 npyv_divisor_s64(npy_int64 d)
     divisor.val[1] = npyv_set_s64(sh);
     #elif defined(NPY_HAVE_LSX)
     divisor.val[1] = npyv_setall_s64(sh);
+    #elif defined(NPY_HAVE_RVV)
+    divisor.val[1] = npyv_setall_s64(sh);
     #else
         #error "please initialize the shifting operand for the new architecture"
     #endif