@@ -323,15 +323,16 @@ float16_t dequantFuncIQ2_XXS(const in decodeBufIQ2_XXS bl, const in uint blockCo
323
323
const uint8_t qs = bl.block.qs[iqs];
324
324
const uint signscale = pack32(u16vec2(bl16.block.qs[4*ib32+2], bl16.block.qs[4*ib32+3]));
325
325
326
- const float16_t dscale = bl.block.d * 0.25hf * (0.5hf + float16_t (signscale >> 28));
326
+ const float dscale = float( bl.block.d) * 0.25 * (0.5 + float (signscale >> 28));
327
327
uint sign = bitfieldExtract(signscale, 7 * int(ib8), 7);
328
328
sign |= bitCount(sign) << 7;
329
329
330
- const uint8_t g = unpack8(iq2xxs_grid[qs][(idx & 4) >> 2])[idx & 3];
330
+ uint g2 = iq2xxs_grid[qs][(idx & 4) >> 2];
331
+ g2 >>= (idx & 2) * 8;
332
+ const vec2 g = vec2(unpack8(g2));
331
333
332
- float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
333
-
334
- return ret;
334
+ vec2 ret = dscale * g * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
335
+ return float16_t(ret[idx & 1]);
335
336
}
336
337
#endif
337
338
@@ -350,14 +351,16 @@ float16_t dequantFuncIQ2_XS(const in decodeBufIQ2_XS bl, const in uint blockCoor
350
351
const uint iqs = (idx & 0xF8) >> 3; // 0..63
351
352
352
353
const uint16_t qs = bl.block.qs[iqs];
353
- const float16_t dscale = bl.block.d * 0.25hf * (0.5hf + float16_t ((bl.block.scales[is] >> sshift) & 0xF));
354
+ const float dscale = float( bl.block.d) * 0.25 * (0.5 + float ((bl.block.scales[is] >> sshift) & 0xF));
354
355
355
356
uint sign = uint(qs >> 9);
356
357
sign |= bitCount(sign) << 7;
357
- const uint8_t g = unpack8(iq2xs_grid[qs & 0x1FF][(idx & 4) >> 2])[idx & 3];
358
+ uint g2 = iq2xs_grid[qs & 0x1FF][(idx & 4) >> 2];
359
+ g2 >>= (idx & 2) * 8;
360
+ const vec2 g = vec2(unpack8(g2));
358
361
359
- float16_t ret = dscale * float16_t(g) * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
360
- return ret;
362
+ vec2 ret = dscale * g * ((sign & (1 << (idx & 7))) != 0 ? -1.0hf : 1.0hf);
363
+ return float16_t( ret[idx & 1]) ;
361
364
}
362
365
#endif
363
366
@@ -369,24 +372,23 @@ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ2
369
372
float16_t dequantFuncIQ2_S(const in decodeBufIQ2_S bl, const in uint blockCoords[2], const in uint coordInBlock[2])
370
373
{
371
374
uint idx = coordInBlock[1];
372
- uint lsb = idx & 1;
373
- idx /= 2;
374
375
375
- const uint ib8 = (idx % 128) / 4; // 0..31
376
- const uint ib32 = ib8 / 4; // 0..7
376
+ const uint ib32 = (idx & 0xE0) >> 5; // 0..7
377
+ const uint ib8 = (idx & 0xF8) >> 3; // 0..31
378
+ const uint qhshift = 2 * (ib8 % 4);
377
379
378
- const uint scale = (bl.block.scales[ib32] >> (2 * (ib8 & 2) )) & 0xf;
380
+ const uint scale = (bl.block.scales[ib32] >> ((idx & 0x10) >> 2 )) & 0xf;
379
381
const uint qs = bl.block.qs[ib8];
380
382
const uint qh = bl.block.qh[ib32];
381
- const uint qhshift = 2 * (ib8 % 4);
382
- const uint sign = bl.block.qs[QUANT_K / 8 + ib8] >> (2 * (idx % 4));
383
+ const uint sign = bl.block.qs[QUANT_K / 8 + ib8] >> (idx & 0x6);
383
384
384
385
const float d = float(bl.block.d);
385
386
const float db = d * 0.25 * (0.5 + scale);
386
- const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(int8_t(sign << 1), int8_t(sign))));
387
- const uint16_t grid = unpack16(iq2s_grid[qs | ((qh << (8 - qhshift)) & 0x300)][(idx & 2) >> 1])[idx & 1];
388
- const vec2 v = db * vec2(sign01) * vec2(unpack8(grid));
389
- return float16_t(v[lsb]);
387
+ const ivec2 sign01 = 1 - (2 & ivec2(sign << 1, sign));
388
+ uint g2 = iq2s_grid[qs | ((qh << (8 - qhshift)) & 0x300)][(idx & 4) >> 2];
389
+ g2 >>= (idx & 2) * 8;
390
+ const vec2 v = db * vec2(sign01) * vec2(unpack8(g2));
391
+ return float16_t(v[idx & 1]);
390
392
}
391
393
#endif
392
394
@@ -401,28 +403,25 @@ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3
401
403
402
404
float16_t dequantFuncIQ3_XXS(const in decodeBufIQ3_XXS bl, const in uint blockCoords[2], const in uint coordInBlock[2])
403
405
{
406
+ decodeBufIQ3_XXS_packed16 bl16 = decodeBufIQ3_XXS_packed16(bl);
404
407
uint idx = coordInBlock[1];
405
- uint lsb = idx & 1;
406
- idx /= 2;
407
408
408
- const uint iqs = (idx % 128) / 2; // 0..63
409
- const uint is = QUANT_K / 4 + 4 * (iqs / 8); // 8 values
409
+ const uint iqs = (idx & 0xFC) >> 2; // 0..63
410
+ const uint is = QUANT_K / 4 + ((idx & 0xE0) >> 3); // 8 values
410
411
411
412
const float d = float(bl.block.d);
412
413
const uint qs = bl.block.qs[iqs];
413
- const uint signs = pack32(u8vec4(
414
- bl.block.qs[is+0],
415
- bl.block.qs[is+1],
416
- bl.block.qs[is+2],
417
- bl.block.qs[is+3]
414
+ const uint signs = pack32(u16vec2(
415
+ bl16.block.qs[is/2+0],
416
+ bl16.block.qs[is/2+1]
418
417
));
419
418
const float db = d * 0.5 * (0.5 + (signs >> 28));
420
419
const uint32_t sign7 = bitfieldExtract(signs, 7 * (int(iqs / 2) % 4), 7);
421
- const uint sign = (sign7 | (bitCount(sign7) << 7)) >> (2 * ( idx % 4) );
422
- const i8vec2 sign01 = i8vec2 (1 - (2 & i8vec2(int8_t( sign << 1), int8_t( sign) )));
423
- const uint grid = iq3xxs_grid[qs] >> (16 * (idx & 1));
420
+ const uint sign = (sign7 | (bitCount(sign7) << 7)) >> (idx & 0x6 );
421
+ const ivec2 sign01 = ivec2 (1 - (2 & ivec2( sign << 1, sign)));
422
+ const uint grid = iq3xxs_grid[qs] >> (16 * (( idx & 2) >> 1));
424
423
const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy);
425
- return float16_t(v[lsb ]);
424
+ return float16_t(v[idx & 1 ]);
426
425
}
427
426
#endif
428
427
@@ -434,23 +433,21 @@ layout(buffer_reference, std430, buffer_reference_align = 2) buffer decodeBufIQ3
434
433
float16_t dequantFuncIQ3_S(const in decodeBufIQ3_S bl, const in uint blockCoords[2], const in uint coordInBlock[2])
435
434
{
436
435
uint idx = coordInBlock[1];
437
- uint lsb = idx & 1;
438
- idx /= 2;
439
436
440
- const uint iqs = (idx % 128) / 2; // 0..63
441
- const uint iqh = iqs / 8 ;
437
+ const uint iqs = (idx & 0xFC) >> 2; // 0..63
438
+ const uint iqh = (idx & 0xE0) >> 5 ;
442
439
443
440
const float d = float(bl.block.d);
444
441
const uint qs = bl.block.qs[iqs];
445
442
const uint qh = bl.block.qh[iqh];
446
- const int8_t sign = int8_t(bl.block.signs[iqs / 2] >> (2 * ( idx % 4) ));
443
+ const int8_t sign = int8_t(bl.block.signs[iqs / 2] >> (idx & 0x6 ));
447
444
const uint scale = bl.block.scales[iqs / 16];
448
- const i8vec2 sign01 = i8vec2 (1 - (2 & i8vec2 (sign << 1, sign)));
445
+ const ivec2 sign01 = ivec2 (1 - (2 & ivec2 (sign << 1, sign)));
449
446
const float db = d * (1 + 2 * ((scale >> (4 * (iqh & 1))) & 0xf));
450
- const uint32_t grid = iq3s_grid[qs | ((qh << (8 - (iqs % 8))) & 256)] >> (16 * (idx % 2));
447
+ const uint32_t grid = iq3s_grid[qs | ((qh << (8 - (iqs % 8))) & 256)] >> ((idx & 2) << 3 );
451
448
const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy);
452
449
453
- return float16_t(v[lsb ]);
450
+ return float16_t(v[idx & 1 ]);
454
451
}
455
452
#endif
456
453
0 commit comments