Skip to content

Commit dc8f646

Browse files
raeburnMikulas Patocka
authored and
Mikulas Patocka
committed
dm vdo: rework processing of loaded refcount byte arrays
Clear provisional refcount values and count free/allocated blocks in one integrated loop. Process 8 aligned bytes at a time instead of every byte individually. On an Intel i7-11850H this reduces the CPU time needed to process a loaded refcount block by a factor of about 5-6. On a large system the refcount loading may be the largest factor in device startup time. Signed-off-by: Ken Raeburn <[email protected]> Signed-off-by: Matthew Sakai <[email protected]> Signed-off-by: Mikulas Patocka <[email protected]>
1 parent ff3f711 commit dc8f646

File tree

1 file changed

+83
-22
lines changed

1 file changed

+83
-22
lines changed

drivers/md/dm-vdo/slab-depot.c

Lines changed: 83 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2164,28 +2164,95 @@ static void dirty_all_reference_blocks(struct vdo_slab *slab)
21642164
dirty_block(&slab->reference_blocks[i]);
21652165
}
21662166

2167+
static inline bool journal_points_equal(struct journal_point first,
2168+
struct journal_point second)
2169+
{
2170+
return ((first.sequence_number == second.sequence_number) &&
2171+
(first.entry_count == second.entry_count));
2172+
}
2173+
21672174
/**
2168-
* clear_provisional_references() - Clear the provisional reference counts from a reference block.
2169-
* @block: The block to clear.
2175+
* match_bytes() - Check an 8-byte word for bytes matching the value specified
2176+
* @input: A word to examine the bytes of
2177+
* @match: The byte value sought
2178+
*
2179+
* Return: 1 in each byte when the corresponding input byte matched, 0 otherwise
21702180
*/
2171-
static void clear_provisional_references(struct reference_block *block)
2181+
static inline u64 match_bytes(u64 input, u8 match)
21722182
{
2173-
vdo_refcount_t *counters = get_reference_counters_for_block(block);
2174-
block_count_t j;
2183+
u64 temp = input ^ (match * 0x0101010101010101ULL);
2184+
/* top bit of each byte is set iff top bit of temp byte is clear; rest are 0 */
2185+
u64 test_top_bits = ~temp & 0x8080808080808080ULL;
2186+
/* top bit of each byte is set iff low 7 bits of temp byte are clear; rest are useless */
2187+
u64 test_low_bits = 0x8080808080808080ULL - (temp & 0x7f7f7f7f7f7f7f7fULL);
2188+
/* return 1 when both tests indicate temp byte is 0 */
2189+
return (test_top_bits & test_low_bits) >> 7;
2190+
}
2191+
2192+
/**
2193+
* count_valid_references() - Process a newly loaded refcount array
2194+
* @counters: the array of counters from a metadata block
2195+
*
2196+
* Scan a 8-byte-aligned array of counters, fixing up any "provisional" values that weren't
2197+
* cleaned up at shutdown, changing them internally to "empty".
2198+
*
2199+
* Return: the number of blocks that are referenced (counters not "empty")
2200+
*/
2201+
static unsigned int count_valid_references(vdo_refcount_t *counters)
2202+
{
2203+
u64 *words = (u64 *)counters;
2204+
/* It's easier to count occurrences of a specific byte than its absences. */
2205+
unsigned int empty_count = 0;
2206+
/* For speed, we process 8 bytes at once. */
2207+
unsigned int words_left = COUNTS_PER_BLOCK / sizeof(u64);
2208+
2209+
/*
2210+
* Sanity check assumptions used for optimizing this code: Counters are bytes. The counter
2211+
* array is a multiple of the word size.
2212+
*/
2213+
BUILD_BUG_ON(sizeof(vdo_refcount_t) != 1);
2214+
BUILD_BUG_ON((COUNTS_PER_BLOCK % sizeof(u64)) != 0);
21752215

2176-
for (j = 0; j < COUNTS_PER_BLOCK; j++) {
2177-
if (counters[j] == PROVISIONAL_REFERENCE_COUNT) {
2178-
counters[j] = EMPTY_REFERENCE_COUNT;
2179-
block->allocated_count--;
2216+
while (words_left > 0) {
2217+
/*
2218+
* This is used effectively as 8 byte-size counters. Byte 0 counts how many words
2219+
* had the target value found in byte 0, etc. We just have to avoid overflow.
2220+
*/
2221+
u64 split_count = 0;
2222+
/*
2223+
* The counter "% 255" trick used below to fold split_count into empty_count
2224+
* imposes a limit of 254 bytes examined each iteration of the outer loop. We
2225+
* process a word at a time, so that limit gets rounded down to 31 u64 words.
2226+
*/
2227+
const unsigned int max_words_per_iteration = 254 / sizeof(u64);
2228+
unsigned int iter_words_left = min_t(unsigned int, words_left,
2229+
max_words_per_iteration);
2230+
2231+
words_left -= iter_words_left;
2232+
2233+
while (iter_words_left--) {
2234+
u64 word = *words;
2235+
u64 temp;
2236+
2237+
/* First, if we have any provisional refcount values, clear them. */
2238+
temp = match_bytes(word, PROVISIONAL_REFERENCE_COUNT);
2239+
if (temp) {
2240+
/*
2241+
* 'temp' has 0x01 bytes where 'word' has PROVISIONAL; this xor
2242+
* will alter just those bytes, changing PROVISIONAL to EMPTY.
2243+
*/
2244+
word ^= temp * (PROVISIONAL_REFERENCE_COUNT ^ EMPTY_REFERENCE_COUNT);
2245+
*words = word;
2246+
}
2247+
2248+
/* Now count the EMPTY_REFERENCE_COUNT bytes, updating the 8 counters. */
2249+
split_count += match_bytes(word, EMPTY_REFERENCE_COUNT);
2250+
words++;
21802251
}
2252+
empty_count += split_count % 255;
21812253
}
2182-
}
21832254

2184-
static inline bool journal_points_equal(struct journal_point first,
2185-
struct journal_point second)
2186-
{
2187-
return ((first.sequence_number == second.sequence_number) &&
2188-
(first.entry_count == second.entry_count));
2255+
return COUNTS_PER_BLOCK - empty_count;
21892256
}
21902257

21912258
/**
@@ -2196,7 +2263,6 @@ static inline bool journal_points_equal(struct journal_point first,
21962263
static void unpack_reference_block(struct packed_reference_block *packed,
21972264
struct reference_block *block)
21982265
{
2199-
block_count_t index;
22002266
sector_count_t i;
22012267
struct vdo_slab *slab = block->slab;
22022268
vdo_refcount_t *counters = get_reference_counters_for_block(block);
@@ -2222,11 +2288,7 @@ static void unpack_reference_block(struct packed_reference_block *packed,
22222288
}
22232289
}
22242290

2225-
block->allocated_count = 0;
2226-
for (index = 0; index < COUNTS_PER_BLOCK; index++) {
2227-
if (counters[index] != EMPTY_REFERENCE_COUNT)
2228-
block->allocated_count++;
2229-
}
2291+
block->allocated_count = count_valid_references(counters);
22302292
}
22312293

22322294
/**
@@ -2247,7 +2309,6 @@ static void finish_reference_block_load(struct vdo_completion *completion)
22472309
struct packed_reference_block *packed = (struct packed_reference_block *) data;
22482310

22492311
unpack_reference_block(packed, block);
2250-
clear_provisional_references(block);
22512312
slab->free_blocks -= block->allocated_count;
22522313
}
22532314
return_vio_to_pool(pooled);

0 commit comments

Comments
 (0)