@@ -1402,59 +1402,76 @@ void mulInternal(BigDigit[] result, const(BigDigit)[] x, const(BigDigit)[] y)
1402
1402
auto extra = x.length % y.length;
1403
1403
auto maxchunk = chunksize + extra;
1404
1404
bool paddingY; // true = we're padding Y, false = we're padding X.
1405
- if ( extra * extra * 2 < y.length* y.length)
1406
- {
1407
- // The leftover bit is small enough that it should be incorporated
1408
- // in the existing chunks.
1409
- // Make all the chunks a tiny bit bigger
1410
- // (We're padding y with zeros)
1411
- chunksize += extra / numchunks ;
1412
- extra = x .length - chunksize * numchunks ;
1413
- // there will probably be a few left over.
1414
- // Every chunk will either have size chunksize, or chunksize+1.
1415
- maxchunk = chunksize + 1 ;
1416
- paddingY = true ;
1417
- assert (chunksize + extra + chunksize * (numchunks - 1 ) == x.length );
1405
+ bool isExtraSmall = extra * extra * 2 < y.length * y.length;
1406
+ if (numchunks == 1 && isExtraSmall)
1407
+ {
1408
+ // We divide (x_first_half * y) and (x_last_half * y)
1409
+ // between 1.414:1 and 1.707:1 (1.707 = 1+1/sqrt(2)).
1410
+ // (1.414 ~ 1.707)/2:1 is balanced.
1411
+ BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(y.length) + y.length] ;
1412
+ BigDigit [] partial = scratchbuff[$ - y .length .. $] ;
1413
+ mulKaratsuba(result[ 0 .. half + y.length], y, x[ 0 .. half], scratchbuff);
1414
+ partial[] = result[half .. half + y.length];
1415
+ mulKaratsuba(result[half .. $], y, x[half .. $], scratchbuff) ;
1416
+ addAssignSimple(result[half .. half + y.length], partial) ;
1417
+ () @trusted { GC .free(scratchbuff.ptr); } ( );
1418
1418
}
1419
1419
else
1420
1420
{
1421
- // the extra bit is large enough that it's worth making a new chunk.
1422
- // (This means we're padding x with zeros, when doing the first one).
1423
- maxchunk = chunksize;
1424
- ++ numchunks;
1425
- paddingY = false ;
1426
- assert (extra + chunksize * (numchunks- 1 ) == x.length );
1427
- }
1428
- // We make the buffer a bit bigger so we have space for the partial sums.
1429
- BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1430
- BigDigit [] partial = scratchbuff[$ - y.length .. $];
1431
- size_t done; // how much of X have we done so far?
1432
- if (paddingY)
1433
- {
1434
- // If the first chunk is bigger, do it first. We're padding y.
1435
- mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1436
- x[0 .. chunksize + (extra> 0 ?1 :0 )], y, scratchbuff);
1437
- done = chunksize + (extra > 0 ? 1 : 0 );
1438
- if (extra) -- extra;
1439
- }
1440
- else
1441
- { // We're padding X. Begin with the extra bit.
1442
- mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1443
- done = extra;
1444
- extra = 0 ;
1445
- }
1446
- immutable basechunksize = chunksize;
1447
- while (done < x.length)
1448
- {
1449
- chunksize = basechunksize + (extra > 0 ? 1 : 0 );
1450
- if (extra) -- extra;
1451
- partial[] = result[done .. done+ y.length];
1452
- mulKaratsuba(result[done .. done + y.length + chunksize],
1453
- x[done .. done+ chunksize], y, scratchbuff);
1454
- addAssignSimple(result[done .. done + y.length + chunksize], partial);
1455
- done += chunksize;
1421
+ if (isExtraSmall)
1422
+ {
1423
+ // The leftover bit is small enough that it should be incorporated
1424
+ // in the existing chunks.
1425
+ // Make all the chunks a tiny bit bigger
1426
+ // (We're padding y with zeros)
1427
+ chunksize += extra / numchunks;
1428
+ extra = x.length - chunksize* numchunks;
1429
+ // there will probably be a few left over.
1430
+ // Every chunk will either have size chunksize, or chunksize+1.
1431
+ maxchunk = chunksize + 1 ;
1432
+ paddingY = true ;
1433
+ assert (chunksize + extra + chunksize * (numchunks- 1 ) == x.length );
1434
+ }
1435
+ else
1436
+ {
1437
+ // the extra bit is large enough that it's worth making a new chunk.
1438
+ // (This means we're padding x with zeros, when doing the first one).
1439
+ maxchunk = chunksize;
1440
+ ++ numchunks;
1441
+ paddingY = false ;
1442
+ assert (extra + chunksize * (numchunks- 1 ) == x.length );
1443
+ }
1444
+ // We make the buffer a bit bigger so we have space for the partial sums.
1445
+ BigDigit [] scratchbuff = new BigDigit[karatsubaRequiredBuffSize(maxchunk) + y.length];
1446
+ BigDigit [] partial = scratchbuff[$ - y.length .. $];
1447
+ size_t done; // how much of X have we done so far?
1448
+ if (paddingY)
1449
+ {
1450
+ // If the first chunk is bigger, do it first. We're padding y.
1451
+ mulKaratsuba(result[0 .. y.length + chunksize + (extra > 0 ? 1 : 0 )],
1452
+ x[0 .. chunksize + (extra> 0 ?1 :0 )], y, scratchbuff);
1453
+ done = chunksize + (extra > 0 ? 1 : 0 );
1454
+ if (extra) -- extra;
1455
+ }
1456
+ else
1457
+ { // We're padding X. Begin with the extra bit.
1458
+ mulKaratsuba(result[0 .. y.length + extra], y, x[0 .. extra], scratchbuff);
1459
+ done = extra;
1460
+ extra = 0 ;
1461
+ }
1462
+ immutable basechunksize = chunksize;
1463
+ while (done < x.length)
1464
+ {
1465
+ chunksize = basechunksize + (extra > 0 ? 1 : 0 );
1466
+ if (extra) -- extra;
1467
+ partial[] = result[done .. done+ y.length];
1468
+ mulKaratsuba(result[done .. done + y.length + chunksize],
1469
+ x[done .. done+ chunksize], y, scratchbuff);
1470
+ addAssignSimple(result[done .. done + y.length + chunksize], partial);
1471
+ done += chunksize;
1472
+ }
1473
+ () @trusted { GC .free(scratchbuff.ptr); } ();
1456
1474
}
1457
- () @trusted { GC .free(scratchbuff.ptr); } ();
1458
1475
}
1459
1476
else
1460
1477
{
@@ -1963,7 +1980,7 @@ bool less(const(BigDigit)[] x, const(BigDigit)[] y) pure nothrow
1963
1980
bool inplaceSub (BigDigit[] result, const (BigDigit)[] x, const (BigDigit)[] y)
1964
1981
pure nothrow
1965
1982
{
1966
- assert (result.length == (x.length >= y.length) ? x.length : y.length);
1983
+ assert (result.length == (( x.length >= y.length) ? x.length : y.length) );
1967
1984
1968
1985
size_t minlen;
1969
1986
bool negative;
@@ -2021,7 +2038,7 @@ void mulKaratsuba(BigDigit [] result, const(BigDigit) [] x,
2021
2038
const (BigDigit)[] y, BigDigit [] scratchbuff) pure nothrow
2022
2039
{
2023
2040
assert (x.length >= y.length);
2024
- assert (result.length < uint .max, " Operands too large" );
2041
+ assert (result.length < uint .max, " Operands too large" );
2025
2042
assert (result.length == x.length + y.length);
2026
2043
if (x.length <= KARATSUBALIMIT )
2027
2044
{
0 commit comments