Skip to content

Commit 8d0faee

Browse files
reorder.c: use void* temp buffer and size-based switch (#7247)
* pointer type changes + replace if with switch * Update src/reorder.c --------- Co-authored-by: Benjamin Schwendinger <[email protected]>
1 parent 55b0de6 commit 8d0faee

File tree

1 file changed

+22
-10
lines changed

1 file changed

+22
-10
lines changed

src/reorder.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,17 @@ SEXP reorder(SEXP x, SEXP order)
6363
// This check is once up front, and then idx is applied to all the columns which is where the most time is spent.
6464
}
6565

66-
char *TMP = R_alloc(nmid, maxSize);
66+
void *TMP = R_alloc(nmid, maxSize);
6767

6868
for (int i=0; i<ncol; ++i) {
6969
const SEXP v = isNewList(x) ? VECTOR_ELT(x,i) : x;
7070
const size_t size = RTYPE_SIZEOF(v); // size_t, otherwise #61 (integer overflow in memcpy)
71-
if (size==4) {
71+
72+
switch (size)
73+
{
74+
case 4: {
7275
const int *restrict vd = DATAPTR_RO(v);
73-
int *restrict tmp = (int *)TMP;
76+
int *restrict tmp = TMP;
7477
#pragma omp parallel for num_threads(getDTthreads(end, true))
7578
for (int i=start; i<=end; ++i) {
7679
tmp[i-start] = vd[idx[i]-1]; // copies 4 bytes; e.g. INTSXP and also SEXP pointers on 32bit (STRSXP and VECSXP)
@@ -79,30 +82,39 @@ SEXP reorder(SEXP x, SEXP order)
7982
// The write to TMP is contiguous, so sync between cpus of written-cache-lines should not be an issue.
8083
// The read from vd is random, but at least the column has a good chance of being all in cache as parallelism is within column.
8184
// As idx approaches being ordered (e.g. moving blocks around) then this should approach read cache-efficiency too.
82-
} else if (size==8) {
85+
break;
86+
}
87+
case 8: {
8388
const double *restrict vd = DATAPTR_RO(v);
84-
double *restrict tmp = (double *)TMP;
89+
double *restrict tmp = TMP;
8590
#pragma omp parallel for num_threads(getDTthreads(end, true))
8691
for (int i=start; i<=end; ++i) {
8792
tmp[i-start] = vd[idx[i]-1]; // copies 8 bytes; e.g. REALSXP and also SEXP pointers on 64bit (STRSXP and VECSXP)
8893
}
89-
} else if (size==16) {
94+
break;
95+
}
96+
case 16: {
9097
const Rcomplex *restrict vd = DATAPTR_RO(v);
91-
Rcomplex *restrict tmp = (Rcomplex *)TMP;
98+
Rcomplex *restrict tmp = TMP;
9299
#pragma omp parallel for num_threads(getDTthreads(end, true))
93100
for (int i=start; i<=end; ++i) {
94101
tmp[i-start] = vd[idx[i]-1];
95102
}
96-
} else { // size 1; checked up front // support raw as column #5100
103+
break;
104+
}
105+
default: { // size 1; checked up front // support raw as column #5100
97106
const Rbyte *restrict vd = DATAPTR_RO(v);
98-
Rbyte *restrict tmp = (Rbyte *)TMP;
107+
Rbyte *restrict tmp = TMP;
99108
#pragma omp parallel for num_threads(getDTthreads(end, true))
100109
for (int i=start; i<=end; ++i) {
101110
tmp[i-start] = vd[idx[i]-1]; // copies 1 bytes; e.g. RAWSXP
102111
}
112+
break;
113+
}
103114
}
115+
104116
// Unique and somber line. Not done lightly. Please read all comments in this file.
105-
memcpy((char *)DATAPTR_RO(v) + size*start, TMP, size*nmid);
117+
memcpy((char*)DATAPTR_RO(v) + size*start, TMP, size*nmid);
106118
// The one and only place in data.table where we write behind the write-barrier. Fundamental to setkey and data.table.
107119
// This file is unique and special w.r.t. the write-barrier: an utterly strict in-place shuffle.
108120
// This shuffle operation does not inc or dec named/refcnt, or anything similar in R: past, present or future.

0 commit comments

Comments
 (0)