@@ -134,6 +134,13 @@ typedef struct _bigval_t {
134
134
size_t sz ;
135
135
uintptr_t age : 2 ;
136
136
};
137
+ #ifdef _P64 // Add padding so that char data[] below is 64-byte aligned
138
+ // (8 pointers of 8 bytes each) - (4 other pointers in struct)
139
+ void * _padding [8 - 4 ];
140
+ #else
141
+ // (16 pointers of 4 bytes each) - (4 other pointers in struct)
142
+ void * _padding [16 - 4 ];
143
+ #endif
137
144
//struct buff_t <>;
138
145
union {
139
146
uintptr_t header ;
@@ -145,7 +152,7 @@ typedef struct _bigval_t {
145
152
#if !defined(_COMPILER_MICROSOFT_ )
146
153
int _dummy [0 ];
147
154
#endif
148
- // must be 16- aligned here, in 32 & 64b
155
+ // must be 64-byte aligned here, in 32 & 64 bit modes
149
156
char data [];
150
157
} bigval_t ;
151
158
@@ -170,7 +177,7 @@ typedef struct _pool_t {
170
177
171
178
#define GC_PAGE_LG2 14 // log2(size of a page)
172
179
#define GC_PAGE_SZ (1 << GC_PAGE_LG2) // 16k
173
- #define GC_PAGE_OFFSET (16 - (sizeof_jl_taggedvalue_t % 16 ))
180
+ #define GC_PAGE_OFFSET (JL_SMALL_BYTE_ALIGNMENT - (sizeof_jl_taggedvalue_t % JL_SMALL_BYTE_ALIGNMENT ))
174
181
175
182
// pool page metadata
176
183
typedef struct _gcpage_t {
@@ -440,15 +447,8 @@ static int jl_gc_finalizers_inhibited; // don't run finalizers during codegen #1
440
447
441
448
// malloc wrappers, aligned allocation
442
449
443
- #if defined(_P64 ) || defined(__APPLE__ )
444
- #define malloc_a16 (sz ) malloc(sz)
445
- #define realloc_a16 (p , sz , oldsz ) realloc((p), (sz))
446
- #define free_a16 (p ) free(p)
447
- #else
448
- #define malloc_a16 (sz ) jl_malloc_aligned(sz, 16)
449
- #define realloc_a16 (p , sz , oldsz ) jl_realloc_aligned(p, sz, oldsz, 16)
450
- #define free_a16 (p ) jl_free_aligned(p)
451
- #endif
450
+ #define malloc_cache_align (sz ) jl_malloc_aligned(sz, JL_CACHE_BYTE_ALIGNMENT)
451
+ #define realloc_cache_align (p , sz , oldsz ) jl_realloc_aligned(p, sz, oldsz, JL_CACHE_BYTE_ALIGNMENT)
452
452
453
453
static void schedule_finalization (void * o , void * f )
454
454
{
@@ -973,10 +973,10 @@ static NOINLINE void *alloc_big(size_t sz)
973
973
{
974
974
maybe_collect ();
975
975
size_t offs = offsetof(bigval_t , header );
976
- size_t allocsz = LLT_ALIGN (sz + offs , 16 );
976
+ size_t allocsz = LLT_ALIGN (sz + offs , JL_CACHE_BYTE_ALIGNMENT );
977
977
if (allocsz < sz ) // overflow in adding offs, size was "negative"
978
978
jl_throw (jl_memory_exception );
979
- bigval_t * v = (bigval_t * )malloc_a16 (allocsz );
979
+ bigval_t * v = (bigval_t * )malloc_cache_align (allocsz );
980
980
if (v == NULL )
981
981
jl_throw (jl_memory_exception );
982
982
jl_atomic_fetch_add (& allocd_bytes , allocsz );
@@ -1036,7 +1036,7 @@ static bigval_t **sweep_big_list(int sweep_mask, bigval_t **pv)
1036
1036
#ifdef MEMDEBUG
1037
1037
memset (v , 0xbb , v -> sz & ~3 );
1038
1038
#endif
1039
- free_a16 (v );
1039
+ jl_free_aligned (v );
1040
1040
big_freed ++ ;
1041
1041
}
1042
1042
big_total ++ ;
@@ -1103,7 +1103,7 @@ static void jl_gc_free_array(jl_array_t *a)
1103
1103
if (a -> flags .how == 2 ) {
1104
1104
char * d = (char * )a -> data - a -> offset * a -> elsize ;
1105
1105
if (a -> flags .isaligned )
1106
- free_a16 (d );
1106
+ jl_free_aligned (d );
1107
1107
else
1108
1108
free (d );
1109
1109
freed_bytes += array_nbytes (a );
@@ -2397,7 +2397,7 @@ void *reallocb(void *b, size_t sz)
2397
2397
if (allocsz < sz) // overflow in adding offs, size was "negative"
2398
2398
jl_throw(jl_memory_exception);
2399
2399
bigval_t *bv = bigval_header(buff);
2400
- bv = (bigval_t*)realloc_a16 (bv, allocsz, bv->sz&~3);
2400
+ bv = (bigval_t*)realloc_cache_align (bv, allocsz, bv->sz&~3);
2401
2401
if (bv == NULL)
2402
2402
jl_throw(jl_memory_exception);
2403
2403
return &bv->data[0];
@@ -2436,7 +2436,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void)
2436
2436
2437
2437
JL_DLLEXPORT jl_value_t * jl_gc_alloc_1w (void )
2438
2438
{
2439
- const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ), 16 );
2439
+ const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ), JL_SMALL_BYTE_ALIGNMENT );
2440
2440
void * tag = NULL ;
2441
2441
#ifdef MEMDEBUG
2442
2442
tag = alloc_big (sz );
@@ -2449,7 +2449,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void)
2449
2449
2450
2450
JL_DLLEXPORT jl_value_t * jl_gc_alloc_2w (void )
2451
2451
{
2452
- const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ) * 2 , 16 );
2452
+ const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ) * 2 , JL_SMALL_BYTE_ALIGNMENT );
2453
2453
void * tag = NULL ;
2454
2454
#ifdef MEMDEBUG
2455
2455
tag = alloc_big (sz );
@@ -2462,7 +2462,7 @@ JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void)
2462
2462
2463
2463
JL_DLLEXPORT jl_value_t * jl_gc_alloc_3w (void )
2464
2464
{
2465
- const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ) * 3 , 16 );
2465
+ const int sz = LLT_ALIGN (sizeof_jl_taggedvalue_t + sizeof (void * ) * 3 , JL_SMALL_BYTE_ALIGNMENT );
2466
2466
void * tag = NULL ;
2467
2467
#ifdef MEMDEBUG
2468
2468
tag = alloc_big (sz );
@@ -2509,7 +2509,7 @@ jl_thread_heap_t *jl_mk_thread_heap(void)
2509
2509
#ifdef JULIA_ENABLE_THREADING
2510
2510
// Cache-aligned malloc
2511
2511
jl_thread_heap =
2512
- (jl_thread_heap_t * )jl_malloc_aligned (sizeof (jl_thread_heap_t ), 64 );
2512
+ (jl_thread_heap_t * )jl_malloc_aligned (sizeof (jl_thread_heap_t ), JL_CACHE_BYTE_ALIGNMENT );
2513
2513
#endif
2514
2514
FOR_CURRENT_HEAP () {
2515
2515
const int * szc = sizeclasses ;
@@ -2673,6 +2673,7 @@ static void big_obj_stats(void)
2673
2673
2674
2674
JL_DLLEXPORT void * jl_gc_counted_malloc (size_t sz )
2675
2675
{
2676
+ sz += JL_SMALL_BYTE_ALIGNMENT ;
2676
2677
maybe_collect ();
2677
2678
allocd_bytes += sz ;
2678
2679
gc_num .malloc ++ ;
@@ -2684,6 +2685,7 @@ JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
2684
2685
2685
2686
JL_DLLEXPORT void * jl_gc_counted_calloc (size_t nm , size_t sz )
2686
2687
{
2688
+ nm += JL_SMALL_BYTE_ALIGNMENT ;
2687
2689
maybe_collect ();
2688
2690
allocd_bytes += nm * sz ;
2689
2691
gc_num .malloc ++ ;
@@ -2696,15 +2698,15 @@ JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
2696
2698
JL_DLLEXPORT void jl_gc_counted_free (void * p , size_t sz )
2697
2699
{
2698
2700
free (p );
2699
- freed_bytes += sz ;
2701
+ freed_bytes += sz + JL_SMALL_BYTE_ALIGNMENT ;
2700
2702
gc_num .freecall ++ ;
2701
2703
}
2702
2704
2703
- JL_DLLEXPORT void * jl_gc_counted_realloc_with_old_size (void * p , size_t old ,
2704
- size_t sz )
2705
+ JL_DLLEXPORT void * jl_gc_counted_realloc_with_old_size (void * p , size_t old , size_t sz )
2705
2706
{
2707
+ old += JL_SMALL_BYTE_ALIGNMENT ;
2708
+ sz += JL_SMALL_BYTE_ALIGNMENT ;
2706
2709
maybe_collect ();
2707
-
2708
2710
if (sz < old )
2709
2711
freed_bytes += (old - sz );
2710
2712
else
@@ -2718,7 +2720,7 @@ JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old,
2718
2720
2719
2721
JL_DLLEXPORT void * jl_malloc (size_t sz )
2720
2722
{
2721
- int64_t * p = (int64_t * )jl_gc_counted_malloc (sz + 16 );
2723
+ int64_t * p = (int64_t * )jl_gc_counted_malloc (sz );
2722
2724
p [0 ] = sz ;
2723
2725
return (void * )(p + 2 );
2724
2726
}
@@ -2727,7 +2729,7 @@ JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
2727
2729
{
2728
2730
int64_t * p ;
2729
2731
size_t nmsz = nm * sz ;
2730
- p = (int64_t * )jl_gc_counted_calloc (nmsz + 16 , 1 );
2732
+ p = (int64_t * )jl_gc_counted_calloc (nmsz , 1 );
2731
2733
p [0 ] = nmsz ;
2732
2734
return (void * )(p + 2 );
2733
2735
}
@@ -2736,7 +2738,7 @@ JL_DLLEXPORT void jl_free(void *p)
2736
2738
{
2737
2739
int64_t * pp = (int64_t * )p - 2 ;
2738
2740
size_t sz = pp [0 ];
2739
- jl_gc_counted_free (pp , sz + 16 );
2741
+ jl_gc_counted_free (pp , sz );
2740
2742
}
2741
2743
2742
2744
JL_DLLEXPORT void * jl_realloc (void * p , size_t sz )
@@ -2751,20 +2753,20 @@ JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
2751
2753
pp = (int64_t * )p - 2 ;
2752
2754
szold = pp [0 ];
2753
2755
}
2754
- int64_t * pnew = (int64_t * )jl_gc_counted_realloc_with_old_size (pp , szold + 16 , sz + 16 );
2756
+ int64_t * pnew = (int64_t * )jl_gc_counted_realloc_with_old_size (pp , szold , sz );
2755
2757
pnew [0 ] = sz ;
2756
2758
return (void * )(pnew + 2 );
2757
2759
}
2758
2760
2759
2761
JL_DLLEXPORT void * jl_gc_managed_malloc (size_t sz )
2760
2762
{
2761
2763
maybe_collect ();
2762
- size_t allocsz = LLT_ALIGN (sz , 16 );
2764
+ size_t allocsz = LLT_ALIGN (sz , JL_CACHE_BYTE_ALIGNMENT );
2763
2765
if (allocsz < sz ) // overflow in adding offs, size was "negative"
2764
2766
jl_throw (jl_memory_exception );
2765
2767
allocd_bytes += allocsz ;
2766
2768
gc_num .malloc ++ ;
2767
- void * b = malloc_a16 (allocsz );
2769
+ void * b = malloc_cache_align (allocsz );
2768
2770
if (b == NULL )
2769
2771
jl_throw (jl_memory_exception );
2770
2772
return b ;
@@ -2775,7 +2777,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
2775
2777
{
2776
2778
maybe_collect ();
2777
2779
2778
- size_t allocsz = LLT_ALIGN (sz , 16 );
2780
+ size_t allocsz = LLT_ALIGN (sz , JL_CACHE_BYTE_ALIGNMENT );
2779
2781
if (allocsz < sz ) // overflow in adding offs, size was "negative"
2780
2782
jl_throw (jl_memory_exception );
2781
2783
@@ -2791,7 +2793,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
2791
2793
2792
2794
void * b ;
2793
2795
if (isaligned )
2794
- b = realloc_a16 (d , allocsz , oldsz );
2796
+ b = realloc_cache_align (d , allocsz , oldsz );
2795
2797
else
2796
2798
b = realloc (d , allocsz );
2797
2799
if (b == NULL )
0 commit comments