Skip to content

Commit a6966e4

Browse files
committed
Speed up putImageData for RGBA canvases
1 parent fe186e5 commit a6966e4

File tree

3 files changed

+90
-30
lines changed

3 files changed

+90
-30
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ project adheres to [Semantic Versioning](http://semver.org/).
99
==================
1010
### Changed
1111
* Switch CI to Github Actions. (Adds Windows and macOS builds.)
12+
* Speed up `putImageData` for RGBA32 canvases.
1213
### Added
1314
* Export `rsvgVersion`.
1415
### Fixed

benchmarks/run.js

+23-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* milliseconds to complete.
55
*/
66

7-
var createCanvas = require('../').createCanvas
7+
var { createCanvas, ImageData } = require('../')
88
var canvas = createCanvas(200, 200)
99
var largeCanvas = createCanvas(1000, 1000)
1010
var ctx = canvas.getContext('2d')
@@ -64,6 +64,28 @@ function done (benchmark, times, start, isAsync) {
6464

6565
// node-canvas
6666

67+
const id0 = new ImageData(200, 200)
68+
69+
bm('putImageData, all a=0', function () {
70+
ctx.putImageData(id0, 0, 0)
71+
})
72+
73+
const id255 = new ImageData(200, 200)
74+
id255.data.fill(0xFF)
75+
76+
bm('putImageData, all a=0xFF', function () {
77+
ctx.putImageData(id255, 0, 0)
78+
})
79+
80+
const idRand = new ImageData(200, 200)
81+
for (let i = 0; i < idRand.data.length; i++) {
82+
idRand.data[i] = 255 * Math.random()
83+
}
84+
85+
bm('putImageData, mixed a', function () {
86+
ctx.putImageData(idRand, 0, 0)
87+
})
88+
6789
bm('fillStyle= name', function () {
6890
ctx.fillStyle = 'transparent'
6991
})

src/CanvasRenderingContext2d.cc

+66-29
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,29 @@
2121

2222
using namespace v8;
2323

24-
// Windows doesn't support the C99 names for these
2524
#ifdef _MSC_VER
26-
#define isnan(x) _isnan(x)
27-
#define isinf(x) (!_finite(x))
25+
// Windows doesn't support the C99 names for these. TODO unnecessary,
26+
// should be using std::isnan.
27+
# define isnan(x) _isnan(x)
28+
# define isinf(x) (!_finite(x))
29+
# include <intrin.h>
30+
# define bswap32 _byteswap_ulong
31+
#else
32+
# ifdef __x86_64__
33+
# include <x86intrin.h>
34+
# endif
35+
# define bswap32 __builtin_bswap32
2836
#endif
2937

38+
static inline uint32_t rotr(uint32_t n, unsigned int c) {
39+
// GCC has no portable _rotr intrinsic, so rely on idiom recognition. Works
40+
// for all supported versions of MSVC, GCC x86, GCC ARM, Clang.
41+
// https://stackoverflow.com/a/776523/1218408
42+
const unsigned int mask = CHAR_BIT * sizeof(n) - 1;
43+
c &= mask;
44+
return (n >> c) | (n << ((~c + 1) & mask));
45+
}
46+
3047
#ifndef isnan
3148
#define isnan(x) std::isnan(x)
3249
#define isinf(x) std::isinf(x)
@@ -852,32 +869,52 @@ NAN_METHOD(Context2d::PutImageData) {
852869
for (int y = 0; y < rows; ++y) {
853870
uint8_t *dstRow = dst;
854871
uint8_t *srcRow = src;
855-
for (int x = 0; x < cols; ++x) {
856-
// rgba
857-
uint8_t r = *srcRow++;
858-
uint8_t g = *srcRow++;
859-
uint8_t b = *srcRow++;
860-
uint8_t a = *srcRow++;
861-
862-
// argb
863-
// performance optimization: fully transparent/opaque pixels can be
864-
// processed more efficiently.
872+
#if defined(__x86_64__) || defined(_M_X64)
873+
int x = 0;
874+
for (; x < cols - 2; x += 2) {
875+
__m128i px;
876+
memcpy(&px, srcRow, 8); // gcc doesn't define _mm_loadu_si64
877+
px = _mm_unpacklo_epi8(px, _mm_setzero_si128());
878+
// rgba -> bgra
879+
px = _mm_shufflelo_epi16(px, 0b11000110);
880+
px = _mm_shufflehi_epi16(px, 0b11000110);
881+
// broadcast alpha
882+
__m128i av = _mm_shufflelo_epi16(px, 0b11111111);
883+
av = _mm_shufflehi_epi16(av, 0b11111111);
884+
// Set alpha channel to 255 to undo upcoming division by 255
885+
av = _mm_and_si128(av, _mm_setr_epi16(0xFFFF, 0xFFFF, 0xFFFF, 0, 0xFFFF, 0xFFFF, 0xFFFF, 0));
886+
av = _mm_or_si128(av, _mm_setr_epi16(0, 0, 0, 255, 0, 0, 0, 255));
887+
px = _mm_mullo_epi16(px, av);
888+
// divide by 255
889+
px = _mm_mulhi_epu16(px, _mm_set1_epi16(0x8081));
890+
px = _mm_srli_epi16(px, 7);
891+
// pack int16 to int8
892+
px = _mm_packus_epi16(px, px);
893+
memcpy(dstRow, &px, 8);
894+
dstRow += 8;
895+
srcRow += 8;
896+
}
897+
if (x < cols) {
898+
#else
899+
for (int x = 0; x < cols; x++) {
900+
#endif
901+
uint32_t c;
902+
memcpy(&c, srcRow, 4); // rgba (LE)
903+
srcRow += 4;
904+
uint32_t a = c >> 24;
865905
if (a == 0) {
866-
*dstRow++ = 0;
867-
*dstRow++ = 0;
868-
*dstRow++ = 0;
869-
*dstRow++ = 0;
870-
} else if (a == 255) {
871-
*dstRow++ = b;
872-
*dstRow++ = g;
873-
*dstRow++ = r;
874-
*dstRow++ = a;
906+
uint32_t zero = 0;
907+
memcpy(dstRow, &zero, 4);
908+
} else if (a == 255) { // rgba (LE)
909+
c = bswap32(c); // abgr
910+
c = rotr(c, 8); // bgra
911+
memcpy(dstRow, &c, 4);
875912
} else {
876-
float alpha = (float)a / 255;
877-
*dstRow++ = b * alpha;
878-
*dstRow++ = g * alpha;
879-
*dstRow++ = r * alpha;
880-
*dstRow++ = a;
913+
uint8_t r = (c & 0xFF) * a / 255;
914+
uint8_t g = (c >> 8 & 0xFF) * a / 255;
915+
uint8_t b = (c >> 16 & 0xFF) * a / 255;
916+
uint32_t bgra = (a << 24) | (r << 16) | (g << 8) | b;
917+
memcpy(dstRow, &bgra, 4);
881918
}
882919
}
883920
dst += dstStride;
@@ -892,13 +929,13 @@ NAN_METHOD(Context2d::PutImageData) {
892929
uint8_t *dstRow = dst;
893930
uint8_t *srcRow = src;
894931
for (int x = 0; x < cols; ++x) {
895-
// rgba
932+
// rgb[a]
896933
uint8_t r = *srcRow++;
897934
uint8_t g = *srcRow++;
898935
uint8_t b = *srcRow++;
899936
srcRow++;
900937

901-
// argb
938+
// bgra
902939
*dstRow++ = b;
903940
*dstRow++ = g;
904941
*dstRow++ = r;

0 commit comments

Comments
 (0)