@@ -63,14 +63,17 @@ SEXP reorder(SEXP x, SEXP order)
6363 // This check is once up front, and then idx is applied to all the columns which is where the most time is spent.
6464 }
6565
66- char * TMP = R_alloc (nmid , maxSize );
66+ void * TMP = R_alloc (nmid , maxSize );
6767
6868 for (int i = 0 ; i < ncol ; ++ i ) {
6969 const SEXP v = isNewList (x ) ? VECTOR_ELT (x ,i ) : x ;
7070 const size_t size = RTYPE_SIZEOF (v ); // size_t, otherwise #61 (integer overflow in memcpy)
71- if (size == 4 ) {
71+
72+ switch (size )
73+ {
74+ case 4 : {
7275 const int * restrict vd = DATAPTR_RO (v );
73- int * restrict tmp = ( int * ) TMP ;
76+ int * restrict tmp = TMP ;
7477 #pragma omp parallel for num_threads(getDTthreads(end, true))
7578 for (int i = start ; i <=end ; ++ i ) {
7679 tmp [i - start ] = vd [idx [i ]- 1 ]; // copies 4 bytes; e.g. INTSXP and also SEXP pointers on 32bit (STRSXP and VECSXP)
@@ -79,30 +82,39 @@ SEXP reorder(SEXP x, SEXP order)
7982 // The write to TMP is contiguous, so sync between cpus of written-cache-lines should not be an issue.
8083 // The read from vd is random, but at least the column has a good chance of being all in cache as parallelism is within column.
8184 // As idx approaches being ordered (e.g. moving blocks around) then this should approach read cache-efficiency too.
82- } else if (size == 8 ) {
85+ break ;
86+ }
87+ case 8 : {
8388 const double * restrict vd = DATAPTR_RO (v );
84- double * restrict tmp = ( double * ) TMP ;
89+ double * restrict tmp = TMP ;
8590 #pragma omp parallel for num_threads(getDTthreads(end, true))
8691 for (int i = start ; i <=end ; ++ i ) {
8792 tmp [i - start ] = vd [idx [i ]- 1 ]; // copies 8 bytes; e.g. REALSXP and also SEXP pointers on 64bit (STRSXP and VECSXP)
8893 }
89- } else if (size == 16 ) {
94+ break ;
95+ }
96+ case 16 : {
9097 const Rcomplex * restrict vd = DATAPTR_RO (v );
91- Rcomplex * restrict tmp = ( Rcomplex * ) TMP ;
98+ Rcomplex * restrict tmp = TMP ;
9299 #pragma omp parallel for num_threads(getDTthreads(end, true))
93100 for (int i = start ; i <=end ; ++ i ) {
94101 tmp [i - start ] = vd [idx [i ]- 1 ];
95102 }
96- } else { // size 1; checked up front // support raw as column #5100
103+ break ;
104+ }
105+ default : { // size 1; checked up front // support raw as column #5100
97106 const Rbyte * restrict vd = DATAPTR_RO (v );
98- Rbyte * restrict tmp = ( Rbyte * ) TMP ;
107+ Rbyte * restrict tmp = TMP ;
99108 #pragma omp parallel for num_threads(getDTthreads(end, true))
100109 for (int i = start ; i <=end ; ++ i ) {
101110 tmp [i - start ] = vd [idx [i ]- 1 ]; // copies 1 bytes; e.g. RAWSXP
102111 }
112+ break ;
113+ }
103114 }
115+
104116 // Unique and somber line. Not done lightly. Please read all comments in this file.
105- memcpy ((char * )DATAPTR_RO (v ) + size * start , TMP , size * nmid );
117+ memcpy ((char * )DATAPTR_RO (v ) + size * start , TMP , size * nmid );
106118 // The one and only place in data.table where we write behind the write-barrier. Fundamental to setkey and data.table.
107119 // This file is unique and special w.r.t. the write-barrier: an utterly strict in-place shuffle.
108120 // This shuffle operation does not inc or dec named/refcnt, or anything similar in R: past, present or future.
0 commit comments