Skip to content

Commit a64b75a

Browse files
authored
Merge pull request #5127 from Harishmcw/gesv-threshold
Refined GESV Parallelization Logic for Windows on ARM64
2 parents 453efbd + daf16b8 commit a64b75a

File tree

1 file changed

+19
-7
lines changed

1 file changed

+19
-7
lines changed

interface/lapack/gesv.c

+19-7
Original file line numberDiff line numberDiff line change
@@ -107,21 +107,33 @@ int NAME(blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, blasint *ipiv,
107107

108108
#ifndef PPC440
109109
buffer = (FLOAT *)blas_memory_alloc(1);
110-
110+
111111
sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
112112
sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
113113
#endif
114114

115115
#ifdef SMP
116116
args.common = NULL;
117-
#ifndef DOUBLE
118-
if (args.m*args.n < 40000)
117+
118+
#if defined(_WIN64) && defined(_M_ARM64)
119+
#ifdef COMPLEX
120+
if (args.m * args.n > 600)
121+
#else
122+
if (args.m * args.n > 1000)
123+
#endif
124+
args.nthreads = num_cpu_avail(4);
125+
else
126+
args.nthreads = 1;
119127
#else
120-
if (args.m*args.n < 10000)
128+
#ifndef DOUBLE
129+
if (args.m * args.n < 40000)
130+
#else
131+
if (args.m * args.n < 10000)
132+
#endif
133+
args.nthreads = 1;
134+
else
135+
args.nthreads = num_cpu_avail(4);
121136
#endif
122-
args.nthreads=1;
123-
else
124-
args.nthreads = num_cpu_avail(4);
125137

126138
if (args.nthreads == 1) {
127139
#endif

0 commit comments

Comments
 (0)