Skip to content

Commit a86419f

Browse files
authored
Merge pull request #5280 from martin-frbg/zscal_x86_64
kernel/x86_64: fixed cscal and zscal
2 parents 11ff18b + 669c847 commit a86419f

File tree

2 files changed

+101
-41
lines changed

2 files changed

+101
-41
lines changed

kernel/x86_64/cscal.c

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -229,10 +229,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
229229

230230
if ( da_i == 0.0 )
231231
{
232-
232+
if (!dummy2) {
233233
while(j < n1)
234234
{
235-
236235
x[i]=0.0;
237236
x[i+1]=0.0;
238237
x[i+inc_x]=0.0;
@@ -244,21 +243,48 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
244243

245244
while(j < n)
246245
{
247-
248246
x[i]=0.0;
249247
x[i+1]=0.0;
250248
i += inc_x ;
251249
j++;
252-
253250
}
251+
} else {
252+
float temp;
253+
while(j < n1)
254+
{
255+
if (isnan(x[i])|| isnan(x[i+1]))
256+
temp=NAN;
257+
else
258+
temp=0.0;
259+
x[i]=temp;
260+
x[i+1]=temp;
261+
if (isnan(x[i+inc_x])|| isnan(x[i+inc_x+1]))
262+
temp=NAN;
263+
else
264+
temp=0.0;
265+
x[i+inc_x]= temp;
266+
x[i+inc_x+1]= temp;
267+
i += 2*inc_x;
268+
j+=2;
254269

270+
}
271+
while(j < n)
272+
{
273+
if (isnan(x[i])|| isnan(x[i+1]))
274+
temp=NAN;
275+
else
276+
temp=0.0;
277+
x[i]=temp;
278+
x[i+1]=temp;
279+
i += inc_x;
280+
j++;
281+
}
282+
}
255283
}
256284
else
257285
{
258-
259286
while(j < n1)
260287
{
261-
262288
if (isnan(x[i]) || isinf(x[i]))
263289
temp0 = NAN;
264290
else
@@ -278,7 +304,6 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
278304
x[i+inc_x] = temp1;
279305
i += 2*inc_x ;
280306
j+=2;
281-
282307
}
283308

284309
while(j < n)
@@ -305,14 +330,12 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
305330
else
306331
{
307332

308-
309-
if ( da_i == 0.0 )
333+
if ( da_i == 0.0 && dummy2 )
310334
{
311335
BLASLONG n1 = n & -2;
312336

313337
while(j < n1)
314338
{
315-
316339
temp0 = da_r * x[i];
317340
x[i+1] = da_r * x[i+1];
318341
x[i] = temp0;
@@ -367,22 +390,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
367390
return(0);
368391
}
369392

370-
371393
BLASLONG n1 = n & -16;
372394
if ( n1 > 0 )
373395
{
374396

375397
alpha[0] = da_r;
376398
alpha[1] = da_i;
377-
378399
if ( da_r == 0.0 )
379-
if ( da_i == 0 )
400+
if ( da_i == 0 && !dummy2)
380401
cscal_kernel_16_zero(n1 , alpha , x);
381402
else
382-
cscal_kernel_16_zero_r(n1 , alpha , x);
403+
cscal_kernel_16/*_zero_r*/(n1 , alpha , x);
383404
else
384405
cscal_kernel_16(n1 , alpha , x);
385-
386406
i = n1 << 1;
387407
j = n1;
388408
}
@@ -393,6 +413,8 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
393413
{
394414
FLOAT res=0.0;
395415
if (isnan(da_r)) res= da_r;
416+
if (dummy2)
417+
if (isnan(x[i])||isnan(x[i+1])) res= NAN;
396418
while(j < n)
397419
{
398420
x[i]=res;
@@ -415,7 +437,6 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
415437

416438
} else
417439
{
418-
419440
while(j < n)
420441
{
421442
temp0 = -da_i * x[i+1];
@@ -424,11 +445,10 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
424445
if (!isinf(x[i+1]))
425446
x[i+1] = da_i * x[i];
426447
else x[i+1] = NAN;
427-
if ( x[i] == x[i]) //preserve NaN
448+
if ( !isnan(x[i])) //preserve NaN
428449
x[i] = temp0;
429450
i += 2 ;
430451
j++;
431-
432452
}
433453

434454
}
@@ -439,12 +459,22 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
439459

440460
if ( da_i == 0.0 )
441461
{
442-
443462
while(j < n)
444463
{
445-
464+
446465
temp0 = da_r * x[i];
447-
x[i+1] = da_r * x[i+1];
466+
if (dummy2) {
467+
if (isnan(x[i])||isinf(x[i])) temp0=NAN;
468+
if (isnan(x[i+1])||isinf(x[i+1]))
469+
x[i+1]=NAN;
470+
else
471+
x[i+1] = da_r * x[i+1];
472+
} else {
473+
if (isnan(x[i]))
474+
x[i+1] = NAN;
475+
else
476+
x[i+1] = da_r * x[i+1];
477+
}
448478
x[i] = temp0;
449479
i += 2 ;
450480
j++;
@@ -476,7 +506,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
476506

477507
temp0 = da_r * x[i] - da_i * x[i+1];
478508
x[i+1] = da_r * x[i+1] + da_i * x[i];
479-
x[i] = temp0;
509+
if(!isnan(x[i]))x[i] = temp0;
480510
i += 2 ;
481511
j++;
482512

kernel/x86_64/zscal.c

Lines changed: 49 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -222,13 +222,14 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
222222

223223
if ( da_r == 0.0 )
224224
{
225+
225226
BLASLONG n1 = n & -2;
226227

227228
if ( da_i == 0.0 )
228229
{
230+
if (!dummy2) {
229231
while(j < n1)
230232
{
231-
232233
x[i]=0.0;
233234
x[i+1]=0.0;
234235
x[i+inc_x]=0.0;
@@ -245,9 +246,40 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
245246
x[i+1]=0.0;
246247
i += inc_x ;
247248
j++;
249+
}
250+
} else {
251+
float temp;
252+
while(j < n1)
253+
{
254+
if (isnan(x[i])|| isnan(x[i+1]))
255+
temp=NAN;
256+
else
257+
temp=0.0;
258+
x[i]=temp;
259+
x[i+1]=temp;
260+
if (isnan(x[i+inc_x])|| isnan(x[i+inc_x+1]))
261+
temp=NAN;
262+
else
263+
temp=0.0;
264+
x[i+inc_x]= temp;
265+
x[i+inc_x+1]= temp;
266+
i += 2*inc_x;
267+
j+=2;
248268

249269
}
270+
while(j < n)
271+
{
272+
if (isnan(x[i])|| isnan(x[i+1]))
273+
temp=NAN;
274+
else
275+
temp=0.0;
276+
x[i]=temp;
277+
x[i+1]=temp;
278+
i += inc_x;
279+
j++;
250280

281+
}
282+
}
251283
}
252284
else
253285
{
@@ -260,7 +292,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
260292
temp0 = -da_i * x[i+1];
261293
if (!isinf(x[i+1]))
262294
x[i+1] = da_i * x[i];
263-
else x[i+1] = NAN;
295+
else x[i+1] = NAN;
264296
x[i] = temp0;
265297
if (isnan(x[i+inc_x]) || isinf(x[i+inc_x]))
266298
temp1 = NAN;
@@ -291,16 +323,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
291323

292324
}
293325

294-
295-
296326
}
297327

298328
}
299329
else
300330
{
301331

302-
303-
if ( da_i == 0.0 )
332+
if ( da_i == 0.0 && dummy2)
304333
{
305334
BLASLONG n1 = n & -2;
306335

@@ -370,26 +399,27 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
370399
alpha[1] = da_i;
371400

372401
if ( da_r == 0.0 )
373-
if ( da_i == 0 )
402+
if ( da_i == 0 && !dummy2 )
374403
zscal_kernel_8_zero(n1 , alpha , x);
375404
else
376-
// zscal_kernel_8_zero_r(n1 , alpha , x);
377405
zscal_kernel_8(n1 , alpha , x);
378406
else
379-
if ( da_i == 0 && da_r == da_r)
407+
/* if ( da_i == 0 && da_r == da_r )
380408
zscal_kernel_8_zero_i(n1 , alpha , x);
381-
else
409+
else*/
382410
zscal_kernel_8(n1 , alpha , x);
383-
}
411+
384412
i = n1 << 1;
385413
j = n1;
386-
387-
if ( da_r == 0.0 || da_r != da_r )
414+
}
415+
if ( da_r == 0.0 || isnan(da_r) )
388416
{
389417
if ( da_i == 0.0 )
390418
{
391-
FLOAT res=0.0;
392-
if (da_r != da_r) res= da_r;
419+
FLOAT res=0.0;
420+
if (isnan(da_r)) res= da_r;
421+
if (dummy2)
422+
if (isnan(x[i])||isnan(x[i+1])) res= NAN;
393423
while(j < n)
394424
{
395425
x[i]=res;
@@ -412,7 +442,6 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
412442

413443
} else
414444
{
415-
416445
while(j < n)
417446
{
418447
temp0 = -da_i * x[i+1];
@@ -421,7 +450,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
421450
if (!isinf(x[i+1]))
422451
x[i+1] = da_i * x[i];
423452
else x[i+1] = NAN;
424-
if ( x[i] == x[i]) //preserve NaN
453+
if ( !isnan(x[i])) //preserve NaN
425454
x[i] = temp0;
426455
i += 2 ;
427456
j++;
@@ -437,8 +466,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
437466
{
438467
while(j < n)
439468
{
440-
441469
temp0 = da_r * x[i];
470+
if (isnan(x[i]))x[i+1]=NAN;
471+
else
442472
x[i+1] = da_r * x[i+1];
443473
x[i] = temp0;
444474
i += 2 ;
@@ -453,7 +483,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
453483
{
454484
temp0 = da_r * x[i] - da_i * x[i+1];
455485
x[i+1] = da_r * x[i+1] + da_i * x[i];
456-
x[i] = temp0;
486+
if(!isnan(x[i]))x[i] = temp0;
457487
i += 2 ;
458488
j++;
459489

0 commit comments

Comments
 (0)