From 201a04470f20b8f5c87762ce6d6a809f05236a38 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Wed, 17 Apr 2024 17:14:48 +0800 Subject: [PATCH 01/30] Add new example for chapter 5 A new directory /examples is added for example codes. .clang-format is copied from sysprog21/lkmpg project. This commit provides a simplified implementation of thread pool. After initializing the thread pool with thread count, jobs can be added. The job queue is a SPMC ring buffer. To keep the implementation minimal, the producer is not protecting, resulting the thread pool can not run automatically when jobs are added. Or the worker may try to get the job before it is fully enqueued. Padding is added in thread_pool_t to avoid false sharing. The number "40" is the sum of size of struct members, including alignment, before the first padding. There should be a better way to determine the value since structure packing is implementation defined. The test in main function results nondeterminate order of jobs echoing its id. A mechanism to wait all jobs to complete should be added later instead of using sleep. `thread_pool_destroyed()` is not functional yet. --- examples/.clang-format | 98 ++++++++++++++++++++++++++ examples/ch5_example.c | 151 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+) create mode 100644 examples/.clang-format create mode 100644 examples/ch5_example.c diff --git a/examples/.clang-format b/examples/.clang-format new file mode 100644 index 0000000..e1e184f --- /dev/null +++ b/examples/.clang-format @@ -0,0 +1,98 @@ +Language: Cpp + +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignOperands: true +AlignTrailingComments: false +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: false +BinPackArguments: true +BinPackParameters: true + +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true + +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Custom +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: false +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeComma +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: false +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: false +DerivePointerAlignment: false +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: false + +ForEachMacros: + - 'list_for_each' + - 'list_for_each_safe' + +IncludeBlocks: Preserve +IncludeCategories: + - Regex: '.*' + Priority: 1 +IncludeIsMainRegex: '(Test)?$' +IndentCaseLabels: false +IndentPPDirectives: None +IndentWidth: 4 +IndentWrappedFunctionNames: false +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None + +PointerAlignment: Right +ReflowComments: false +SortIncludes: false +SortUsingDeclarations: false +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInContainerLiterals: false +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Cpp03 +TabWidth: 4 +UseTab: Never \ No newline at end of file diff --git a/examples/ch5_example.c b/examples/ch5_example.c new file mode 100644 index 0000000..d5fdaca --- /dev/null +++ b/examples/ch5_example.c @@ -0,0 +1,151 @@ +#include +#include +#include +#include +#include +#include +#include + +#define CACHE_LINE_SIZE 64 + +typedef struct job { + void *args; + struct job *next; + struct job *prev; +} job_t; + +enum state { idle, running, cancelled }; + +typedef struct thread_pool { + atomic_flag initialezed; + int size; + thrd_t *pool; + atomic_int state; + thrd_start_t func; + // job queue is a SPMC ring buffer + job_t *head; + char padding1[CACHE_LINE_SIZE - 40]; + atomic_uintptr_t tail; // pointer to head->prev + char padding2[CACHE_LINE_SIZE - sizeof(atomic_uintptr_t)]; +} thread_pool_t; + +int worker(void *args) +{ + if (!args) + return EXIT_FAILURE; + thread_pool_t *thrd_pool = (thread_pool_t *)args; + + while (1) { + uintptr_t job; + if (atomic_load(&thrd_pool->state) == running) { + // claim the job + job = atomic_load(&thrd_pool->tail); + while (!atomic_compare_exchange_strong( + &thrd_pool->tail, &job, (uintptr_t)(&(*(job_t **)job)->prev))) { + } + } else if (atomic_load(&thrd_pool->state) == cancelled) { + return EXIT_SUCCESS; + } else { + continue; + } + + if ((*(job_t **)job)->args == NULL) { + // store happens-before while loop + atomic_store(&thrd_pool->state, idle); + while (1) { + if (atomic_load(&thrd_pool->state) == running) + break; + // To auto run when jobs added, check if head and tail are different + // as long as producer is protected + } + } else { + printf("Hello from job %d\n", *(int *)(*(job_t **)job)->args); + free((*(job_t **)job)->args); + free(*(job_t **)job); + } + }; +} + +bool thread_pool_init(thread_pool_t *thrd_pool, int size) +{ + atomic_flag_test_and_set(&thrd_pool->initialezed); // It's useless anyway + + // TODO: size should be a positive integer + // malloc with zero size is non-portable + thrd_pool->pool = malloc(sizeof(thrd_t) * size); + if (!thrd_pool->pool) { + printf("Failed to allocate thread identifiers.\n"); + return false; + } + + // May use memory pool for jobs + job_t *idle_job = malloc(sizeof(job_t)); + if (!idle_job) { + printf("Failed to allocate idle job.\n"); + return false; + } + // idle_job will always be the first job + idle_job->args = NULL; + idle_job->next = idle_job; + idle_job->prev = idle_job; + thrd_pool->func = worker; + thrd_pool->head = idle_job; + thrd_pool->tail = + (atomic_uintptr_t)(&thrd_pool->head->prev); // init is not multihtreaded + thrd_pool->state = idle; + thrd_pool->size = size; + + for (int i = 0; i < size; i++) { + thrd_create(thrd_pool->pool + i, worker, thrd_pool); + //TODO: error handling + } + + return true; +} + +void thread_pool_destroy(thread_pool_t *thrd_pool) +{ + atomic_store(&thrd_pool->state, cancelled); + free(thrd_pool->pool); + free(thrd_pool); + + thrd_pool = NULL; +} + +__attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) +{ + // May use memory pool for jobs + job_t *job = malloc(sizeof(job_t)); + if (!job) + return false; + + // unprotected producer + job->args = args; + job->next = thrd_pool->head->next; + job->prev = thrd_pool->head; + thrd_pool->head->next->prev = job; + thrd_pool->head->next = job; + + return true; +} + +int main() +{ + thread_pool_t thrd_pool; + int thread_count = 8; + int job_count = 16; + if (!thread_pool_init(&thrd_pool, thread_count)) { + printf("failed to init.\n"); + return 0; + } + for (int i = 0; i < job_count; i++) { + int *id = malloc(sizeof(int)); + *id = i; + add_job(&thrd_pool, id); + } + // Due to simplified job queue (not protecting producer), starting the pool manually + atomic_fetch_add(&thrd_pool.state, 1); + sleep(2); + atomic_fetch_add(&thrd_pool.state, 1); + return 0; +} From 6abc1ea28d4a7bc747bc1fe047a9c6eceb4d789b Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Wed, 17 Apr 2024 20:53:28 +0800 Subject: [PATCH 02/30] Refactor worker thread conditional code path Codes executed under running condition are placed in the same scope. --- examples/ch5_example.c | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/examples/ch5_example.c b/examples/ch5_example.c index d5fdaca..3d86d88 100644 --- a/examples/ch5_example.c +++ b/examples/ch5_example.c @@ -36,32 +36,30 @@ int worker(void *args) thread_pool_t *thrd_pool = (thread_pool_t *)args; while (1) { - uintptr_t job; - if (atomic_load(&thrd_pool->state) == running) { + if (atomic_load(&thrd_pool->state) == cancelled) { + return EXIT_SUCCESS; + } else if (atomic_load(&thrd_pool->state) == running) { // claim the job - job = atomic_load(&thrd_pool->tail); + uintptr_t job = atomic_load(&thrd_pool->tail); while (!atomic_compare_exchange_strong( &thrd_pool->tail, &job, (uintptr_t)(&(*(job_t **)job)->prev))) { } - } else if (atomic_load(&thrd_pool->state) == cancelled) { - return EXIT_SUCCESS; - } else { - continue; - } - - if ((*(job_t **)job)->args == NULL) { - // store happens-before while loop - atomic_store(&thrd_pool->state, idle); - while (1) { - if (atomic_load(&thrd_pool->state) == running) - break; - // To auto run when jobs added, check if head and tail are different - // as long as producer is protected + if ((*(job_t **)job)->args == NULL) { + // store happens-before while loop + atomic_store(&thrd_pool->state, idle); + while (1) { + if (atomic_load(&thrd_pool->state) == running) + break; + // To auto run when jobs added, check if head and tail are different + // as long as producer is protected + } + } else { + printf("Hello from job %d\n", *(int *)(*(job_t **)job)->args); + free((*(job_t **)job)->args); + free(*(job_t **)job); } } else { - printf("Hello from job %d\n", *(int *)(*(job_t **)job)->args); - free((*(job_t **)job)->args); - free(*(job_t **)job); + continue; } }; } From 69494757de90f99243099594b8e6d057e6b8fc92 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Wed, 17 Apr 2024 20:58:16 +0800 Subject: [PATCH 03/30] Combine identifier declarations in one line Both next and prev in job_t are struct job, thus residing them in the same line. --- examples/ch5_example.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/ch5_example.c b/examples/ch5_example.c index 3d86d88..8293e0c 100644 --- a/examples/ch5_example.c +++ b/examples/ch5_example.c @@ -10,8 +10,7 @@ typedef struct job { void *args; - struct job *next; - struct job *prev; + struct job *next, *prev; } job_t; enum state { idle, running, cancelled }; From b1b3431f2c73964855c6f8b01f3a9fbb7ab73279 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Wed, 17 Apr 2024 23:08:13 +0800 Subject: [PATCH 04/30] Add assert to avoid the case size equals zero An assert is added before malloc. Also type of size to changed to size_t. --- examples/ch5_example.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/ch5_example.c b/examples/ch5_example.c index 8293e0c..969b437 100644 --- a/examples/ch5_example.c +++ b/examples/ch5_example.c @@ -5,6 +5,7 @@ #include #include #include +#include #define CACHE_LINE_SIZE 64 @@ -63,12 +64,11 @@ int worker(void *args) }; } -bool thread_pool_init(thread_pool_t *thrd_pool, int size) +bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) { atomic_flag_test_and_set(&thrd_pool->initialezed); // It's useless anyway - // TODO: size should be a positive integer - // malloc with zero size is non-portable + assert(size > 0); thrd_pool->pool = malloc(sizeof(thrd_t) * size); if (!thrd_pool->pool) { printf("Failed to allocate thread identifiers.\n"); From fabf7423002a2fe949541cb05071db93791399b4 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Wed, 17 Apr 2024 23:13:54 +0800 Subject: [PATCH 05/30] Shorten else if as an if --- examples/ch5_example.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/ch5_example.c b/examples/ch5_example.c index 969b437..74b0681 100644 --- a/examples/ch5_example.c +++ b/examples/ch5_example.c @@ -36,9 +36,9 @@ int worker(void *args) thread_pool_t *thrd_pool = (thread_pool_t *)args; while (1) { - if (atomic_load(&thrd_pool->state) == cancelled) { + if (atomic_load(&thrd_pool->state) == cancelled) return EXIT_SUCCESS; - } else if (atomic_load(&thrd_pool->state) == running) { + if (atomic_load(&thrd_pool->state) == running) { // claim the job uintptr_t job = atomic_load(&thrd_pool->tail); while (!atomic_compare_exchange_strong( From aefdd1628842f1f85aa4fdd1c0bcf705983a2fb6 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Fri, 19 Apr 2024 03:31:29 +0800 Subject: [PATCH 06/30] Replace long expressions with macros Two macros `CAST_JOB(job, type)` and `PREV_JOB(job)` are added to simplify long expressoins to improve readability. --- examples/ch5_example.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/ch5_example.c b/examples/ch5_example.c index 74b0681..bf11f50 100644 --- a/examples/ch5_example.c +++ b/examples/ch5_example.c @@ -8,6 +8,8 @@ #include #define CACHE_LINE_SIZE 64 +#define CAST_JOB(job, type) ({ *(type *)(*(job_t **)(job))->args; }) +#define PREV_JOB(job) ({ (uintptr_t)(&(*(job_t **)(job))->prev); }) typedef struct job { void *args; @@ -41,8 +43,8 @@ int worker(void *args) if (atomic_load(&thrd_pool->state) == running) { // claim the job uintptr_t job = atomic_load(&thrd_pool->tail); - while (!atomic_compare_exchange_strong( - &thrd_pool->tail, &job, (uintptr_t)(&(*(job_t **)job)->prev))) { + while (!atomic_compare_exchange_strong(&thrd_pool->tail, &job, + PREV_JOB(job))) { } if ((*(job_t **)job)->args == NULL) { // store happens-before while loop @@ -54,7 +56,7 @@ int worker(void *args) // as long as producer is protected } } else { - printf("Hello from job %d\n", *(int *)(*(job_t **)job)->args); + printf("Hello from job %d\n", CAST_JOB(job, int)); free((*(job_t **)job)->args); free(*(job_t **)job); } From 47702ca4df9ba61d2ab462d16d74bbc9bb510ca0 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Sun, 21 Apr 2024 20:31:57 +0800 Subject: [PATCH 07/30] Fix wrong behavior of job queue A new struct `idle_job` is added. There are several ways to have atomic `thrd_pool->head->prev` (the original tail): (1) _Atomic used as either specifier or qualifier in C11 acts on object, not region. So we can not have only the idle `job_t` have atomic `prev`. All job_t would have atomic `prev`. However _Atomic is only allowed to act on complete type, meaning that `_Atomic(struct job *)` and `_Atomic(void *)` are not allowed in the declaration of `job_t`. `atomic_uintptr` has already shown enough casting chaos in the previous commit. (2) Embed `job_t` in a new struct `idle_job` along side `_Atomic(job_t *) prev;`. In worker function, the last job is accessed through `thrd_pool->head->prev`, the same as (1). The only difference is how the idle job is initialized and how the first job is added. Padding could also be added around `prev` to avoid false sharing. Test in main demonstrates a series of jobs are added after fininshing existing ones. `thread_pool_destroy` is implemented to cancel and free the pool. Additional sleep could be added before detroy to observ the second series of jobs. Notice that freeing memory of job in worker directly after using it may cause dangling pointer in other threads. Safe memory reclaimation should be introduced to avoid this completely. Or use memory pool for jobs. --- examples/ch5_example.c | 87 ++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 37 deletions(-) diff --git a/examples/ch5_example.c b/examples/ch5_example.c index bf11f50..da3b879 100644 --- a/examples/ch5_example.c +++ b/examples/ch5_example.c @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -8,14 +7,18 @@ #include #define CACHE_LINE_SIZE 64 -#define CAST_JOB(job, type) ({ *(type *)(*(job_t **)(job))->args; }) -#define PREV_JOB(job) ({ (uintptr_t)(&(*(job_t **)(job))->prev); }) typedef struct job { void *args; struct job *next, *prev; } job_t; +typedef struct idle_job { + _Atomic(job_t *) prev; + char padding[CACHE_LINE_SIZE - sizeof(_Atomic(job_t *))]; + job_t job; +} idle_job_t; + enum state { idle, running, cancelled }; typedef struct thread_pool { @@ -25,10 +28,7 @@ typedef struct thread_pool { atomic_int state; thrd_start_t func; // job queue is a SPMC ring buffer - job_t *head; - char padding1[CACHE_LINE_SIZE - 40]; - atomic_uintptr_t tail; // pointer to head->prev - char padding2[CACHE_LINE_SIZE - sizeof(atomic_uintptr_t)]; + idle_job_t *head; } thread_pool_t; int worker(void *args) @@ -42,25 +42,22 @@ int worker(void *args) return EXIT_SUCCESS; if (atomic_load(&thrd_pool->state) == running) { // claim the job - uintptr_t job = atomic_load(&thrd_pool->tail); - while (!atomic_compare_exchange_strong(&thrd_pool->tail, &job, - PREV_JOB(job))) { + job_t *job = atomic_load(&thrd_pool->head->prev); + while (!atomic_compare_exchange_strong(&thrd_pool->head->prev, &job, + job->prev)) { } - if ((*(job_t **)job)->args == NULL) { - // store happens-before while loop + if (job->args == NULL) { atomic_store(&thrd_pool->state, idle); - while (1) { - if (atomic_load(&thrd_pool->state) == running) - break; - // To auto run when jobs added, check if head and tail are different - // as long as producer is protected - } } else { - printf("Hello from job %d\n", CAST_JOB(job, int)); - free((*(job_t **)job)->args); - free(*(job_t **)job); + printf("Hello from job %d\n", *(int *)job->args); + // this could free too early + free(job->args); + free(job); } } else { + /* To auto run when jobs added, set status to running if job queue is not empty. + * As long as the producer is protected*/ + thrd_yield(); continue; } }; @@ -78,19 +75,18 @@ bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) } // May use memory pool for jobs - job_t *idle_job = malloc(sizeof(job_t)); + idle_job_t *idle_job = malloc(sizeof(idle_job_t)); if (!idle_job) { printf("Failed to allocate idle job.\n"); return false; } // idle_job will always be the first job - idle_job->args = NULL; - idle_job->next = idle_job; - idle_job->prev = idle_job; + idle_job->job.args = NULL; + idle_job->job.next = &idle_job->job; + idle_job->job.prev = &idle_job->job; + idle_job->prev = &idle_job->job; thrd_pool->func = worker; thrd_pool->head = idle_job; - thrd_pool->tail = - (atomic_uintptr_t)(&thrd_pool->head->prev); // init is not multihtreaded thrd_pool->state = idle; thrd_pool->size = size; @@ -105,10 +101,16 @@ bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) void thread_pool_destroy(thread_pool_t *thrd_pool) { atomic_store(&thrd_pool->state, cancelled); + for (int i = 0; i < thrd_pool->size; i++) { + thrd_join(thrd_pool->pool[i], NULL); + } + while (thrd_pool->head->prev != &thrd_pool->head->job) { + job_t *job = thrd_pool->head->prev->prev; + free(thrd_pool->head->prev); + thrd_pool->head->prev = job; + } + free(thrd_pool->head); free(thrd_pool->pool); - free(thrd_pool); - - thrd_pool = NULL; } __attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) @@ -120,10 +122,15 @@ __attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) // unprotected producer job->args = args; - job->next = thrd_pool->head->next; - job->prev = thrd_pool->head; - thrd_pool->head->next->prev = job; - thrd_pool->head->next = job; + job->next = thrd_pool->head->job.next; + job->prev = &thrd_pool->head->job; + thrd_pool->head->job.next->prev = job; + thrd_pool->head->job.next = job; + if (thrd_pool->head->prev == &thrd_pool->head->job) { + thrd_pool->head->prev = job; + // trap worker at idle job + thrd_pool->head->job.prev = &thrd_pool->head->job; + } return true; } @@ -143,8 +150,14 @@ int main() add_job(&thrd_pool, id); } // Due to simplified job queue (not protecting producer), starting the pool manually - atomic_fetch_add(&thrd_pool.state, 1); - sleep(2); - atomic_fetch_add(&thrd_pool.state, 1); + atomic_store(&thrd_pool.state, running); + sleep(1); + for (int i = 0; i < job_count; i++) { + int *id = malloc(sizeof(int)); + *id = i; + add_job(&thrd_pool, id); + } + atomic_store(&thrd_pool.state, running); + thread_pool_destroy(&thrd_pool); return 0; } From 347b752391619f1ef2394fec75fe8e1ef895808f Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Mon, 22 Apr 2024 01:55:30 +0800 Subject: [PATCH 08/30] Add use case of atomic exhange, TAS, fetch AND Atomic flag is used for checking if the given thread pool has been initialized. The flag is initialized when thread pool struct declared and reseted to false when thread pool destroyed. Atomic exchange obtains previous state when destroying thread pool and gives warnning message if the state were running. Atomic fetch and AND with zero demonstrate a way to set state to idle. --- examples/ch5_example.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/examples/ch5_example.c b/examples/ch5_example.c index da3b879..57e087d 100644 --- a/examples/ch5_example.c +++ b/examples/ch5_example.c @@ -50,13 +50,13 @@ int worker(void *args) atomic_store(&thrd_pool->state, idle); } else { printf("Hello from job %d\n", *(int *)job->args); - // this could free too early + // could cause dangling pointer in other threads free(job->args); free(job); } } else { /* To auto run when jobs added, set status to running if job queue is not empty. - * As long as the producer is protected*/ + * As long as the producer is protected */ thrd_yield(); continue; } @@ -65,7 +65,10 @@ int worker(void *args) bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) { - atomic_flag_test_and_set(&thrd_pool->initialezed); // It's useless anyway + if (atomic_flag_test_and_set(&thrd_pool->initialezed)) { + printf("This thread pool has already been initialized.\n"); + return false; + } assert(size > 0); thrd_pool->pool = malloc(sizeof(thrd_t) * size); @@ -100,7 +103,8 @@ bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) void thread_pool_destroy(thread_pool_t *thrd_pool) { - atomic_store(&thrd_pool->state, cancelled); + if(atomic_exchange(&thrd_pool->state, cancelled)) + printf("Thread pool cancelled with jobs still running.\n"); for (int i = 0; i < thrd_pool->size; i++) { thrd_join(thrd_pool->pool[i], NULL); } @@ -111,6 +115,8 @@ void thread_pool_destroy(thread_pool_t *thrd_pool) } free(thrd_pool->head); free(thrd_pool->pool); + atomic_fetch_and(&thrd_pool->state, 0); + atomic_flag_clear(&thrd_pool->initialezed); } __attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) @@ -137,7 +143,7 @@ __attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) int main() { - thread_pool_t thrd_pool; + thread_pool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; int thread_count = 8; int job_count = 16; if (!thread_pool_init(&thrd_pool, thread_count)) { From 928c1eca3dc79d5a875345a4e8f418c5d19874af Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Tue, 23 Apr 2024 16:36:54 +0800 Subject: [PATCH 09/30] Define job count and thread count as constant Both job_count and thread_count were meant to be constant in the given test scenario. Thus they were specified as macros instead of variables. --- examples/ch5_example.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/examples/ch5_example.c b/examples/ch5_example.c index 57e087d..61f6c66 100644 --- a/examples/ch5_example.c +++ b/examples/ch5_example.c @@ -7,6 +7,8 @@ #include #define CACHE_LINE_SIZE 64 +#define N_JOBS 16 +#define N_THREADS 8 typedef struct job { void *args; @@ -50,9 +52,8 @@ int worker(void *args) atomic_store(&thrd_pool->state, idle); } else { printf("Hello from job %d\n", *(int *)job->args); - // could cause dangling pointer in other threads free(job->args); - free(job); + free(job); // could cause dangling pointer in other threads } } else { /* To auto run when jobs added, set status to running if job queue is not empty. @@ -103,7 +104,7 @@ bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) void thread_pool_destroy(thread_pool_t *thrd_pool) { - if(atomic_exchange(&thrd_pool->state, cancelled)) + if (atomic_exchange(&thrd_pool->state, cancelled)) printf("Thread pool cancelled with jobs still running.\n"); for (int i = 0; i < thrd_pool->size; i++) { thrd_join(thrd_pool->pool[i], NULL); @@ -144,13 +145,11 @@ __attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) int main() { thread_pool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; - int thread_count = 8; - int job_count = 16; - if (!thread_pool_init(&thrd_pool, thread_count)) { + if (!thread_pool_init(&thrd_pool, N_THREADS)) { printf("failed to init.\n"); return 0; } - for (int i = 0; i < job_count; i++) { + for (int i = 0; i < N_JOBS; i++) { int *id = malloc(sizeof(int)); *id = i; add_job(&thrd_pool, id); @@ -158,7 +157,7 @@ int main() // Due to simplified job queue (not protecting producer), starting the pool manually atomic_store(&thrd_pool.state, running); sleep(1); - for (int i = 0; i < job_count; i++) { + for (int i = 0; i < N_JOBS; i++) { int *id = malloc(sizeof(int)); *id = i; add_job(&thrd_pool, id); From 4c9a9084b5ee0314a5be011b29f0df8e51066ea3 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Fri, 3 May 2024 18:02:33 +0800 Subject: [PATCH 10/30] Revise section 5 with new example Clarification on read-modify-write is first added at the beginning of the section. To make the discussion based on atomic load/store, more information is supplemented at the end of section 2. Example code is included by using minted package. Each subsection is revised according to the atomic library usage in the example. At the end of sectoin 5, a new subsection "further improvements" is added to discuss topics on leveraging other memory order, false sharing and safe memory reclamation. First two topics are forward referenced to according chapters. The last one is not covered in this book so it has no reference. --- concurrency-primer.tex | 176 +++++++++++++++++++++++------------------ examples/rmw_example.c | 168 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+), 76 deletions(-) create mode 100644 examples/rmw_example.c diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 2d096bb..79c30f4 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -321,6 +321,9 @@ \section{Enforcing law and order} This model, defined by Leslie Lamport in 1979, is called \introduce{sequential consistency}. +Notice that using atomic variables as an lvalue expression, such as \monobox{v\_ready = true} and \monobox{while(!v\_ready)}, is a convenient alternative to explicitly using \monobox{atomic\_load} or \monobox{atomic\_store}. +Lvalue-to-rvalue conversion (which models a memory read from an atomic location to a CPU register) strips atomicity along with other qualifiers. + \section{Atomicity} \label{atomicity} But order is only one of the vital ingredients for inter-thread communication. @@ -379,56 +382,79 @@ \section{Arbitrarily-sized ``atomic'' types} \section{Read-modify-write} \label{rmw} - -Loads and stores are all well and good, -but sometimes we need to read a value, modify it, +% To add: +% 值得注意的是 atomic type 不僅是 RWM 的保證,也是前後執行順序的保證,那可以把他當作 memery barrier 嗎? +So far we have introduced the importance of order and atomicity. +The latter ensures that no other operations on the target object success until the executing operation has finished. +This also establishes ordering between operations, as no operations can occur concurrently. +For two operations, A and B, either A happens before B or B happens before A. +As in \secref{seqcst}, a local order of other operations associated to the an atomic object is given as well, with \introduce{sequential consistency} as default consistency level. +Since happens before relation is transitive, just like $>$ and $<$, a global order is established by combining local order and inter-thread order provided by atomic objects. + +Atomic loads and stores are all well and good when we don't need to consider the previous state of atomic variables. +But sometimes we need to read a value, modify it, and write it back as a single atomic step. +That is, the modification is based on the previous state that is visible for reading, and the result is then written back. +A complete \introduce{read-modify-write} operation is performed atomically to ensure visibility to subsequent operations. + There are a few common \introduce{read-modify-write} (\textsc{RMW}) operations. In \cplusplus{}, they are represented as member functions of \cpp|std::atomic|. In \clang{}, they are freestanding functions. +Following example code is a simplify implementation of thread pool to demonstrate the use of \clang{}11 atomic library. + +\inputminted{c}{./examples/rmw_example.c} + +Compile the code with \monobox{gcc rmw\_example.c -o rmw\_example -Wall -Wextra -std=c11 -pthread} and execute the program. +A thread pool has three states: idle, cancelled and running. +It is initialized with \monobox{N\_THREADS} (default 8) of threads. +\monobox{N\_JOBS} (default 16) of jobs are added, and the pool is then set to running. +A job is simply echoing its job ID. +\monobox{sleep(1)} is used to ensure that the second batch of jobs is added after the first batch is finished; otherwise, jobs may not be consumed as expected. +Thread pool is then destroyed right after starting running. +Possible stdout of the program is: +\begin{ccode} +Hello from job 5 +Hello from job 8 +Hello from job 9 +Hello from job 10 +Hello from job 11 +Hello from job 12 +Hello from job 13 +Hello from job 14 +Hello from job 15 +Hello from job 3 +Hello from job 1 +Hello from job 6 +Hello from job 4 +Hello from job 7 +Hello from job 2 +Hello from job 0 +Hello from job 0 +Hello from job 1 +Hello from job 3 +Hello from job 2 +Thread pool cancelled with jobs still running. +\end{ccode} + \subsection{Exchange} \label{exchange} The simplest atomic \textsc{RMW} operation is an \introduce{exchange}: the current value is read and replaced with a new one. -To see where this might be useful, -let's tweak our example from \secref{atomicity}: -instead of displaying the total number of processed files, -the \textsc{UI} might want to show how many were processed per second. -We could implement this by having the \textsc{UI} thread read the counter then zero it each second. -But we could get the following race condition if reading and zeroing are separate steps: -\begin{enumerate} - \item The \textsc{UI} thread reads the counter. - \item Before the \textsc{UI} thread has the chance to zero it, - the worker thread increments it again. - \item The \textsc{UI} thread now zeroes the counter, and the previous increment is lost. -\end{enumerate} -If the \textsc{UI} thread atomically exchanges the current value with zero, -the race disappears. +In function \monobox{thread\_pool\_destroy}, \monobox{atomic\_exchange(\&thrd\_pool->state, cancelled)} reads current state and replaces it with "cancelled". A warning message is printed if the pool is destroyed when still running. +If the exchange is not performed atomically, we may initially get the state as "running". Subsequently, a thread could set the state to "cancelled" after finishing the last one, resulting in a false warning. \subsection{Test and set} \introduce{Test-and-set} works on a Boolean value: we read it, set it to \cpp|true|, and provide the value it held beforehand. \clang{} and \cplusplus{} offer a type dedicated to this purpose, called \monobox{atomic\_flag}. -We could use it to build a simple spinlock: -\label{spinlock} -\begin{ccode} -atomic_flag af = ATOMIC_FLAG_INIT; - -void lock() -{ - while (atomic_flag_test_and_set(&af)) { /* wait */ } -} - -void unlock() { atomic_flag_clear(&af); } -\end{ccode} -If we call \cc|lock()| and the previous value is \cc|false|, -we are the first to acquire the lock, -and can proceed with exclusive access to whatever the lock protects. -If the previous value is \cc|true|, -someone else has acquired the lock and we must wait until they release it by clearing the flag. +The value of the flag is indeterminate until initialized with \monobox{ATOMIC\_FLAG\_INIT} macro. +A thread pool has a \monobox{atomic\_flag} indicating it's initialized or not. The flag ensures initialization is thread-safe, preventing a pool from being reinitialized. +Function \monobox{thread\_pool\_init} sets the flag with \monobox{atomic\_flag\_test\_and\_set(\&thrd\_pool->initialezed)} first. +If the return value is \monobox{true}, initialization is not performed again. +Function \monobox{thread\_pool\_destroy} clears the flag with \monobox{atomic\_flag\_clear(\&thrd\_pool->initialezed)} after destroying everything. \subsection{Fetch and…} @@ -436,13 +462,9 @@ \subsection{Fetch and…} perform a simple operation on it (such as addition, subtraction, or bitwise \textsc{AND}, \textsc{OR}, \textsc{XOR}) and return its previous value, all as part of a single atomic operation. -You might recall from the exchange example that additions by the worker thread must be atomic to prevent races, where: -\begin{enumerate} - \item The worker thread loads the current counter value and adds one. - \item Before that thread can store the value back, - the \textsc{UI} thread zeroes the counter. - \item The worker now performs its store, as if the counter was never cleared. -\end{enumerate} +In the function \monobox{thread\_pool\_destroy}, \monobox{atomic\_fetch\_and} is utilized as a means to set the state to idle. +Yet, in this case, it is not necessary, as the pool needs to be reinitialized for further use regardless. +Its return value could be further utilized, for instance, to report the previous state and perform additional actions. \subsection{Compare and swap} \label{cas} @@ -452,52 +474,54 @@ \subsection{Compare and swap} It allows us to conditionally exchange a value \emph{if} its previous value matches some expected one. In \clang{} and \cplusplus{}, \textsc{CAS} resembles the following, if it were executed atomically: -\begin{cppcode} -template -bool atomic::compare_exchange_strong( - T& expected, T desired) +\begin{ccode} +/* A is an atomic type. C is the non-atomic type corresponding to A */ +bool atomic_compare_exchange_strong(A* obj, C* expected, C desired) { - if (*this == expected) { - *this = desired; + if (memcmp(obj, expected, sizeof(*object)) == 0) { + memcpy(obj, &desired, sizeof(*object)); return true; + } else { + memcpy(expected, obj, sizeof(*object)); + return false; } - expected = *this; - return false; } -\end{cppcode} +\end{ccode} \begin{samepage} \noindent The \cpp|_strong| suffix may leave you wondering if there is a corresponding ``weak'' \textsc{CAS}. Indeed, there is. However, we will delve into that topic later in \secref{spurious-llsc-failures}. \end{samepage} -Let's say we have some long-running task that we might want to cancel. -We'll give it three states: \textit{idle}, \textit{running}, -and \textit{cancelled}, and write a loop that exits when it is cancelled. -\begin{cppcode} -enum class TaskState : int8_t { - Idle, Running, Cancelled -}; +Once threads are created in the thread pool as workers, they will continuously search for jobs to do. +Jobs are taken from the tail of job queue. +To claim a job without it being taken by another worker halfway through, we need to atomically change the pointer to the last job. Otherwise the last job is under races. +The while loop in function \monobox{worker}, +\begin{ccode} +while (!atomic_compare_exchange_strong(&thrd_pool->head->prev, &job, + job->prev)) { +} +\end{ccode} +, keeps trying to claim the job atomically until success. -std::atomic ts; +Built-in post increment and decrement operators and compound assignment on atomic objects, such as \monobox{++} and \monobox{+=}, are read-modify-write atomic operations with total sequentially consistent ordering as well. +They behave equivalently to a \monobox{do while} loop. See \clang{}11 standard 6.5.2.4 and 6.5.16.2 for more details. -void taskLoop() -{ - ts = TaskState::Running; - while (ts == TaskState::Running) { - // Do good work. - } -} -\end{cppcode} -If we want to cancel the task if it is running, but do nothing if it is idle, -we could \textsc{CAS}: -\begin{cppcode} -bool cancel() -{ - auto expected = TaskState::Running; - return ts.compare_exchange_strong(expected, TaskState::Cancelled); -} -\end{cppcode} +\subsection{Further improments} +At the beginning of \secref{rmw}, we described how a global total order is established by combining local order and inter-thread order imposed by atomic objects. +But should every object, including non-atomic ones, participate in a single global order established by atomic objects? +\introduce{Sequential consistency} solves the ordering problem in in \secref{seqcst}, but it may force too much ordering, as some normal operations may not require it. +Without specifying, atomic operations in \clang{}11 atomic library use \monobox{memory\_order\_seq\_cst} as default memory order. Operations post-fix with \monobox{\_explicit} accept an additional argument to specify which memory order to use. +How to leverage memory orders to optimize performance will be covered later in \secref{lock-example}. + +You may have noticed that there is padding after \monobox{\_Atomic(job\_t *) prev} in \monobox{struct idle\_job} in the example. +It is used for preventing \introduce{false sharing} in a cache line. +Further discussion on cache effects and false sharing is provided in \secref{false-sharing}. + +Jobs in the example are dynamically allocated. They are freed after worker finishes each job. +However, this situation may lead to dangling pointers for workers that are still holding and attempting to claim the job. +If jobs are intended to be dynamically allocated, then safe memory reclamation should be implemented for such shared objects. +RCU, hazard pointer and reference counting are major ways of solving this problem. \section{Atomic operations as building blocks} @@ -530,7 +554,7 @@ \section{Atomic operations as building blocks} Lockless algorithms are not inherently superior or quicker than blocking ones; they serve different purposes with their own design philosophies. Additionally, the mere use of atomic operations does not render algorithms lock-free. -For example, our basic spinlock discussed in \secref{spinlock} is still considered a blocking algorithm even though it eschews \textsc{OS}-specific syscalls for making the blocked thread sleep. +For example, basic spinlock is still considered a blocking algorithm even though it eschews \textsc{OS}-specific syscalls for making the blocked thread sleep. Putting a blocked thread to sleep is often an optimization, allowing the operating system's scheduler to allocate \textsc{CPU} resources to active threads until the blocked one is revived. Some concurrency libraries even introduce hybrid locks that combine brief spinning with sleeping to balance \textsc{CPU} usage and context-switching overheads. diff --git a/examples/rmw_example.c b/examples/rmw_example.c new file mode 100644 index 0000000..3a787d7 --- /dev/null +++ b/examples/rmw_example.c @@ -0,0 +1,168 @@ +#include +#include +#include +#include +#include +#include +#include + +#define CACHE_LINE_SIZE 64 +#define N_JOBS 16 +#define N_THREADS 8 + +typedef struct job { + void *args; + struct job *next, *prev; +} job_t; + +typedef struct idle_job { + _Atomic(job_t *) prev; + char padding[CACHE_LINE_SIZE - sizeof(_Atomic(job_t *))]; + job_t job; +} idle_job_t; + +enum state { idle, running, cancelled }; + +typedef struct thread_pool { + atomic_flag initialezed; + int size; + thrd_t *pool; + atomic_int state; + thrd_start_t func; + // job queue is a SPMC ring buffer + idle_job_t *head; +} thread_pool_t; + +int worker(void *args) +{ + if (!args) + return EXIT_FAILURE; + thread_pool_t *thrd_pool = (thread_pool_t *)args; + + while (1) { + if (atomic_load(&thrd_pool->state) == cancelled) + return EXIT_SUCCESS; + if (atomic_load(&thrd_pool->state) == running) { + // claim the job + job_t *job = atomic_load(&thrd_pool->head->prev); + while (!atomic_compare_exchange_strong(&thrd_pool->head->prev, &job, + job->prev)) { + } + if (job->args == NULL) { + atomic_store(&thrd_pool->state, idle); + } else { + printf("Hello from job %d\n", *(int *)job->args); + free(job->args); + free(job); // could cause dangling pointer in other threads + } + } else { + /* To auto run when jobs added, set status to running if job queue is not empty. + * As long as the producer is protected */ + thrd_yield(); + continue; + } + }; +} + +bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) +{ + if (atomic_flag_test_and_set(&thrd_pool->initialezed)) { + printf("This thread pool has already been initialized.\n"); + return false; + } + + assert(size > 0); + thrd_pool->pool = malloc(sizeof(thrd_t) * size); + if (!thrd_pool->pool) { + printf("Failed to allocate thread identifiers.\n"); + return false; + } + + // May use memory pool for jobs + idle_job_t *idle_job = malloc(sizeof(idle_job_t)); + if (!idle_job) { + printf("Failed to allocate idle job.\n"); + return false; + } + // idle_job will always be the first job + idle_job->job.args = NULL; + idle_job->job.next = &idle_job->job; + idle_job->job.prev = &idle_job->job; + idle_job->prev = &idle_job->job; + thrd_pool->func = worker; + thrd_pool->head = idle_job; + thrd_pool->state = idle; + thrd_pool->size = size; + + for (size_t i = 0; i < size; i++) { + thrd_create(thrd_pool->pool + i, worker, thrd_pool); + //TODO: error handling + } + + return true; +} + +void thread_pool_destroy(thread_pool_t *thrd_pool) +{ + if (atomic_exchange(&thrd_pool->state, cancelled)) + printf("Thread pool cancelled with jobs still running.\n"); + for (int i = 0; i < thrd_pool->size; i++) { + thrd_join(thrd_pool->pool[i], NULL); + } + while (thrd_pool->head->prev != &thrd_pool->head->job) { + job_t *job = thrd_pool->head->prev->prev; + free(thrd_pool->head->prev); + thrd_pool->head->prev = job; + } + free(thrd_pool->head); + free(thrd_pool->pool); + atomic_fetch_and(&thrd_pool->state, 0); + atomic_flag_clear(&thrd_pool->initialezed); +} + +__attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) +{ + // May use memory pool for jobs + job_t *job = malloc(sizeof(job_t)); + if (!job) + return false; + + // unprotected producer + job->args = args; + job->next = thrd_pool->head->job.next; + job->prev = &thrd_pool->head->job; + thrd_pool->head->job.next->prev = job; + thrd_pool->head->job.next = job; + if (thrd_pool->head->prev == &thrd_pool->head->job) { + thrd_pool->head->prev = job; + // trap worker at idle job + thrd_pool->head->job.prev = &thrd_pool->head->job; + } + + return true; +} + +int main() +{ + thread_pool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; + if (!thread_pool_init(&thrd_pool, N_THREADS)) { + printf("failed to init.\n"); + return 0; + } + for (int i = 0; i < N_JOBS; i++) { + int *id = malloc(sizeof(int)); + *id = i; + add_job(&thrd_pool, id); + } + // Due to simplified job queue (not protecting producer), starting the pool manually + atomic_store(&thrd_pool.state, running); + sleep(1); + for (int i = 0; i < N_JOBS; i++) { + int *id = malloc(sizeof(int)); + *id = i; + add_job(&thrd_pool, id); + } + atomic_store(&thrd_pool.state, running); + thread_pool_destroy(&thrd_pool); + return 0; +} From c88b4dca50a8c5181a5fe9c246e617336a3e6efa Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Fri, 3 May 2024 18:22:48 +0800 Subject: [PATCH 11/30] Rename ch5_example.c to rmw_example.c rmw_cample.c is committed in the last commit. --- examples/ch5_example.c | 168 ----------------------------------------- 1 file changed, 168 deletions(-) delete mode 100644 examples/ch5_example.c diff --git a/examples/ch5_example.c b/examples/ch5_example.c deleted file mode 100644 index 61f6c66..0000000 --- a/examples/ch5_example.c +++ /dev/null @@ -1,168 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define CACHE_LINE_SIZE 64 -#define N_JOBS 16 -#define N_THREADS 8 - -typedef struct job { - void *args; - struct job *next, *prev; -} job_t; - -typedef struct idle_job { - _Atomic(job_t *) prev; - char padding[CACHE_LINE_SIZE - sizeof(_Atomic(job_t *))]; - job_t job; -} idle_job_t; - -enum state { idle, running, cancelled }; - -typedef struct thread_pool { - atomic_flag initialezed; - int size; - thrd_t *pool; - atomic_int state; - thrd_start_t func; - // job queue is a SPMC ring buffer - idle_job_t *head; -} thread_pool_t; - -int worker(void *args) -{ - if (!args) - return EXIT_FAILURE; - thread_pool_t *thrd_pool = (thread_pool_t *)args; - - while (1) { - if (atomic_load(&thrd_pool->state) == cancelled) - return EXIT_SUCCESS; - if (atomic_load(&thrd_pool->state) == running) { - // claim the job - job_t *job = atomic_load(&thrd_pool->head->prev); - while (!atomic_compare_exchange_strong(&thrd_pool->head->prev, &job, - job->prev)) { - } - if (job->args == NULL) { - atomic_store(&thrd_pool->state, idle); - } else { - printf("Hello from job %d\n", *(int *)job->args); - free(job->args); - free(job); // could cause dangling pointer in other threads - } - } else { - /* To auto run when jobs added, set status to running if job queue is not empty. - * As long as the producer is protected */ - thrd_yield(); - continue; - } - }; -} - -bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) -{ - if (atomic_flag_test_and_set(&thrd_pool->initialezed)) { - printf("This thread pool has already been initialized.\n"); - return false; - } - - assert(size > 0); - thrd_pool->pool = malloc(sizeof(thrd_t) * size); - if (!thrd_pool->pool) { - printf("Failed to allocate thread identifiers.\n"); - return false; - } - - // May use memory pool for jobs - idle_job_t *idle_job = malloc(sizeof(idle_job_t)); - if (!idle_job) { - printf("Failed to allocate idle job.\n"); - return false; - } - // idle_job will always be the first job - idle_job->job.args = NULL; - idle_job->job.next = &idle_job->job; - idle_job->job.prev = &idle_job->job; - idle_job->prev = &idle_job->job; - thrd_pool->func = worker; - thrd_pool->head = idle_job; - thrd_pool->state = idle; - thrd_pool->size = size; - - for (int i = 0; i < size; i++) { - thrd_create(thrd_pool->pool + i, worker, thrd_pool); - //TODO: error handling - } - - return true; -} - -void thread_pool_destroy(thread_pool_t *thrd_pool) -{ - if (atomic_exchange(&thrd_pool->state, cancelled)) - printf("Thread pool cancelled with jobs still running.\n"); - for (int i = 0; i < thrd_pool->size; i++) { - thrd_join(thrd_pool->pool[i], NULL); - } - while (thrd_pool->head->prev != &thrd_pool->head->job) { - job_t *job = thrd_pool->head->prev->prev; - free(thrd_pool->head->prev); - thrd_pool->head->prev = job; - } - free(thrd_pool->head); - free(thrd_pool->pool); - atomic_fetch_and(&thrd_pool->state, 0); - atomic_flag_clear(&thrd_pool->initialezed); -} - -__attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) -{ - // May use memory pool for jobs - job_t *job = malloc(sizeof(job_t)); - if (!job) - return false; - - // unprotected producer - job->args = args; - job->next = thrd_pool->head->job.next; - job->prev = &thrd_pool->head->job; - thrd_pool->head->job.next->prev = job; - thrd_pool->head->job.next = job; - if (thrd_pool->head->prev == &thrd_pool->head->job) { - thrd_pool->head->prev = job; - // trap worker at idle job - thrd_pool->head->job.prev = &thrd_pool->head->job; - } - - return true; -} - -int main() -{ - thread_pool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; - if (!thread_pool_init(&thrd_pool, N_THREADS)) { - printf("failed to init.\n"); - return 0; - } - for (int i = 0; i < N_JOBS; i++) { - int *id = malloc(sizeof(int)); - *id = i; - add_job(&thrd_pool, id); - } - // Due to simplified job queue (not protecting producer), starting the pool manually - atomic_store(&thrd_pool.state, running); - sleep(1); - for (int i = 0; i < N_JOBS; i++) { - int *id = malloc(sizeof(int)); - *id = i; - add_job(&thrd_pool, id); - } - atomic_store(&thrd_pool.state, running); - thread_pool_destroy(&thrd_pool); - return 0; -} From 1ad39bb765a3868e536a4e9ff691041ccbbefc93 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Fri, 3 May 2024 18:27:34 +0800 Subject: [PATCH 12/30] Remove unneeded comment --- concurrency-primer.tex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 79c30f4..d8bf9cf 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -382,8 +382,7 @@ \section{Arbitrarily-sized ``atomic'' types} \section{Read-modify-write} \label{rmw} -% To add: -% 值得注意的是 atomic type 不僅是 RWM 的保證,也是前後執行順序的保證,那可以把他當作 memery barrier 嗎? + So far we have introduced the importance of order and atomicity. The latter ensures that no other operations on the target object success until the executing operation has finished. This also establishes ordering between operations, as no operations can occur concurrently. From 619b05d7661d228f856bd22f0a5b66e0172d30a4 Mon Sep 17 00:00:00 2001 From: Chih-Wei Chien Date: Tue, 7 May 2024 02:14:17 +0800 Subject: [PATCH 13/30] Add race conditoin example A diff file is added to patch original example to the one that can cause races. Substituting `threads.h` with `pthread.h` is also included in diff because sanitizer hasn't support C11 thread yet. How sanitizer works and how to use it are added as well. Then explainations on warning messages from Tsan are followed. The part mentioning safe memory reclaimation is moved to this subsection because warning messages from Tsan mentioned it. A missing reference to spinlock (originally as one of the rmw examples) in section 9 was added back as a new code block. --- concurrency-primer.tex | 50 +++++++++++++++++++++++++++++++++------- examples/racer.diff | 52 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+), 8 deletions(-) create mode 100644 examples/racer.diff diff --git a/concurrency-primer.tex b/concurrency-primer.tex index d8bf9cf..ba22c5d 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -504,9 +504,38 @@ \subsection{Compare and swap} , keeps trying to claim the job atomically until success. Built-in post increment and decrement operators and compound assignment on atomic objects, such as \monobox{++} and \monobox{+=}, are read-modify-write atomic operations with total sequentially consistent ordering as well. -They behave equivalently to a \monobox{do while} loop. See \clang{}11 standard 6.5.2.4 and 6.5.16.2 for more details. +They behave equivalently to a \cc|do while| loop. See \clang{}11 standard 6.5.2.4 and 6.5.16.2 for more details. + +What if claiming a job, which updates \cc|thrd_pool->head->prev|, is not done atomically? +Let's remove the atomicity of claiming a job and use thread sanitizer to detect races. +Thread sanitizer is one of the sanitizers provided by \introduce{gcc} and \introduce{clang} to detect data races. +Data races are undefined behavior in \clang{}11 and \cplusplus{}11. +Thread sanitizer inserts runtime code into the target program to track memory accesses. +When races occur during execution, warning messages are printed. +To enable this, add \monobox{-fsanitize=thread -g} to compiler flags. +The following diff patch removes the atomicity of claiming a job and uses pthread instead of \clang{}11 thread, because thread sanitizer currently hasn't support \clang{}11 thread yet. +Save diff as \monobox{racer.diff} and patch the example code by \monobox{\$ patch rmw\_example.c race.diff}. + +\inputminted{diff}{./examples/racer.diff} + +After compiling and running the example, you will see warning messages printed and same job IDs got echoed repeatly. +The top two sections of a warning message indicate which two threads executed which function causing the data race. +The bottom two sections indicate how these two threads were created. +If the race occurred on a heap block, a third section would appear indicating how the block was allocated. +At the end of a warning message, a summary indicates the type of race and where it occurred. +You'll see that two lines of code, which claim a job, are highlighted as causing a data race on \cc|thrd_pool->head|. + +You may notice that there is another part of the code causing races. +While some workers were echoing ID, others were attempting to free the job, resulting in heap-use-after-free and data races. +This occurred when two workers claimed the same job, as the claiming process was not atomic. +But even when jobs were claimed atomically, this still can occur. +This is a defect of the example code. +Jobs in the example are dynamically allocated. They are freed after worker finishes each job. +However, this situation may lead to dangling pointers for workers that are still holding and attempting to claim the job. +If jobs are intended to be dynamically allocated, then safe memory reclamation should be implemented for such shared objects. +RCU, hazard pointer and reference counting are major ways of solving this problem. -\subsection{Further improments} +\subsection{Further improvements} At the beginning of \secref{rmw}, we described how a global total order is established by combining local order and inter-thread order imposed by atomic objects. But should every object, including non-atomic ones, participate in a single global order established by atomic objects? \introduce{Sequential consistency} solves the ordering problem in in \secref{seqcst}, but it may force too much ordering, as some normal operations may not require it. @@ -517,11 +546,6 @@ \subsection{Further improments} It is used for preventing \introduce{false sharing} in a cache line. Further discussion on cache effects and false sharing is provided in \secref{false-sharing}. -Jobs in the example are dynamically allocated. They are freed after worker finishes each job. -However, this situation may lead to dangling pointers for workers that are still holding and attempting to claim the job. -If jobs are intended to be dynamically allocated, then safe memory reclamation should be implemented for such shared objects. -RCU, hazard pointer and reference counting are major ways of solving this problem. - \section{Atomic operations as building blocks} Atomic loads, stores, and \textsc{RMW} operations are the building blocks for every single concurrency tool. @@ -738,7 +762,17 @@ \section{Do we always need sequentially consistent operations?} they inhibit optimizations that your compiler and hardware would otherwise make. What if we could avoid some of this slowdown? -Consider a simple case like the spinlock from \secref{spinlock}. +Consider a simple case like spinlock. +\begin{ccode} +atomic_flag af = ATOMIC_FLAG_INIT; + +void lock() +{ + while (atomic_flag_test_and_set(&af)) { /* wait */ } +} + +void unlock() { atomic_flag_clear(&af); } +\end{ccode} Between the \cc|lock()| and \cc|unlock()| calls, we have a \introduce{critical section} where we can safely modify shared state protected by the lock. Outside this critical section, diff --git a/examples/racer.diff b/examples/racer.diff new file mode 100644 index 0000000..9455c4f --- /dev/null +++ b/examples/racer.diff @@ -0,0 +1,52 @@ +4c4 +< #include +--- +> #include +19c19 +< _Atomic(job_t *) prev; +--- +> job_t* prev; +29c29 +< thrd_t *pool; +--- +> pthread_t *pool; +31c31 +< thrd_start_t func; +--- +> void *(*func)(void *); +36c36 +< int worker(void *args) +--- +> void *worker(void *args) +39c39 +< return EXIT_FAILURE; +--- +> return NULL; +44c44 +< return EXIT_SUCCESS; +--- +> return NULL; +47,50c47,48 +< job_t *job = atomic_load(&thrd_pool->head->prev); +< while (!atomic_compare_exchange_strong(&thrd_pool->head->prev, &job, +< job->prev)) { +< } +--- +> job_t *job = thrd_pool->head->prev; +> thrd_pool->head->prev = job->prev; +61c59 +< thrd_yield(); +--- +> sched_yield(); +75c73 +< thrd_pool->pool = malloc(sizeof(thrd_t) * size); +--- +> thrd_pool->pool = malloc(sizeof(pthread_t) * size); +98c96 +< thrd_create(thrd_pool->pool + i, worker, thrd_pool); +--- +> pthread_create(thrd_pool->pool + i, NULL, worker, thrd_pool); +110c108 +< thrd_join(thrd_pool->pool[i], NULL); +--- +> pthread_join(thrd_pool->pool[i], NULL); From 3ea014e7cd39f4e7b4a80852b1b7e28bd82a82d5 Mon Sep 17 00:00:00 2001 From: idoleat Date: Wed, 29 May 2024 19:36:14 +0800 Subject: [PATCH 14/30] Refine the statement on atomicity The original statement is only true on the successful operation. To other failed operations, it is the successful one that finished before failed ones. The new statement does not totally cover the charateristic of atomic operations though. It is the generated cmpxchg or LL/SC loop that make the operation keeps trying and eventually finish. But considering that the purpose of this paragraph is to plot a big picture of order and atomicity, more details on atomic operations should be covered in section "Atomic as building blocks". More refences to compiler and CPU menufacturer documents should be taken in consideration then. --- concurrency-primer.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/concurrency-primer.tex b/concurrency-primer.tex index ba22c5d..c5d45d1 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -384,7 +384,7 @@ \section{Read-modify-write} \label{rmw} So far we have introduced the importance of order and atomicity. -The latter ensures that no other operations on the target object success until the executing operation has finished. +The latter ensures that an operation can eventually finish without being interfered by other operations. This also establishes ordering between operations, as no operations can occur concurrently. For two operations, A and B, either A happens before B or B happens before A. As in \secref{seqcst}, a local order of other operations associated to the an atomic object is given as well, with \introduce{sequential consistency} as default consistency level. From 834a856228cfff6d6f2954cd641fbebb35f1a297 Mon Sep 17 00:00:00 2001 From: idoleat Date: Fri, 31 May 2024 01:01:51 +0800 Subject: [PATCH 15/30] Update descriptions referencing to section 5 The intro of section 10 originally references back to spinlock in section 5. It is now replace by using the new example. Section 10.2 onriginally references back to UI thread in section 5. It is now removed as new example presents down below to explain relaxed memory model. New example is used here as well is because it is exactly what original example was talking about. --- concurrency-primer.tex | 126 +++++++++++++++++------------------------ 1 file changed, 53 insertions(+), 73 deletions(-) diff --git a/concurrency-primer.tex b/concurrency-primer.tex index c5d45d1..2af6ee3 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -762,44 +762,34 @@ \section{Do we always need sequentially consistent operations?} they inhibit optimizations that your compiler and hardware would otherwise make. What if we could avoid some of this slowdown? -Consider a simple case like spinlock. +consider the example provided in \secref{rmw}, +where an atomic pointer \monobox{prev} in \monobox{struct idle\_job} is assigned an address in function \monobox{thread\_pool\_init}: \begin{ccode} -atomic_flag af = ATOMIC_FLAG_INIT; - -void lock() -{ - while (atomic_flag_test_and_set(&af)) { /* wait */ } -} - -void unlock() { atomic_flag_clear(&af); } +idle_job->job.args = NULL; +idle_job->job.next = &idle_job->job; +idle_job->job.prev = &idle_job->job; +idle_job->prev = &idle_job->job; /* assign to atomic pointer */ +thrd_pool->func = worker; +thrd_pool->head = idle_job; +thrd_pool->state = idle; +thrd_pool->size = size; \end{ccode} -Between the \cc|lock()| and \cc|unlock()| calls, -we have a \introduce{critical section} where we can safely modify shared state protected by the lock. -Outside this critical section, -we only read and write to things that are not shared with other threads. -\begin{cppcode} -deepThought.calculate(); // non-shared - -lock(); // Lock; critical section begins -sharedState.subject = "Life, the universe and everything"; -sharedState.answer = 42; -unlock(); // Unlock; critical section ends - -demolishEarth(vogons); // non-shared -\end{cppcode} - -It is vital that reads and writes to shared memory do not move outside the critical section. -But the opposite is not true! -The compiler and hardware could move as much as they want \emph{into} the critical section without causing any trouble. +An simple assignment on an atomic object is equivalent to \cc|atomic_store(A* obj , C desired)|. +In this case, statements above line 4 is guaranteed to happen before the atomic operation, +and the atomic operation is guaranteed to happen before statements below line 4. +However, this series of operations are filling fields in structures. They do not have data dependecies so they are not necessarily executed in some order. We have no problem with the following if it is somehow faster: -\begin{cppcode} -lock(); // Lock; critical section begins -deepThought.calculate(); // non-shared -sharedState.subject = "Life, the universe and everything"; -sharedState.answer = 42; -demolishEarth(vogons); // non-shared -unlock(); // Unlock; critical section ends -\end{cppcode} +\begin{ccode} +idle_job->prev = &idle_job->job; /* assign to atomic pointer */ +idle_job->job.args = NULL; +idle_job->job.next = &idle_job->job; +idle_job->job.prev = &idle_job->job; +thrd_pool->func = worker; +thrd_pool->head = idle_job; +thrd_pool->state = idle; +thrd_pool->size = size; +\end{ccode} +The compiler is free to reorder instructions and the befavior of \monobox{thread\_pool\_init} would remain the same. So, how do we tell the compiler as much? \section{Memory orderings} @@ -919,51 +909,41 @@ \subsection{Relaxed} Relaxed atomic operations are useful for variables shared between threads where \emph{no specific order} of operations is needed. Although it may seem like a niche requirement, such scenarios are quite common. -Refer back to our discussions on \secref{atomicity} and \secref{rmw} operations, -where a worker thread increments a counter that a \textsc{UI} thread then reads. -In this case, the counter can be incremented using \cc|fetch_add(1, memory_order_relaxed)| because the only requirement is atomicity; -the counter itself does not coordinate synchronization. - -Relaxed operations are also beneficial for managing flags shared between threads. -For example, a thread might continuously run until it receives a signal to exit: +Relaxed operations are beneficial for managing flags shared between threads. +For example, a worker thread in thread pool in \secref{rmw} might continuously run until it receives a cancelled signal: \begin{cppcode} -atomic_bool stop(false); - -void worker() -{ - while (!stop.load(memory_order_relaxed)) { - // Do good work. +while (1) { + if (atomic_load_explicit(&thrd_pool->state, memory_order_relaxed) == cancelled) + return EXIT_SUCCESS; + if (atomic_load_explicit(&thrd_pool->state, memory_order_relaxed) == running) { + // claim the job + job_t *job = atomic_load(&thrd_pool->head->prev); + while (!atomic_compare_exchange_strong_explicit(&thrd_pool->head->prev, &job, + job->prev, memory_order_release, + memory_order_relaxed)) { + } + if (job->args == NULL) { + atomic_store(&thrd_pool->state, idle); + } else { + printf("Hello from job %d\n", *(int *)job->args); + free(job->args); + free(job); // could cause dangling pointer in other threads + } + } else { + /* To auto run when jobs added, set status to running if job queue is not empty. + * As long as the producer is protected */ + thrd_yield(); + continue; } -} - -int main() -{ - launchWorker(); - // Wait some... - stop = true; // seq_cst - joinWorker(); -} +}; \end{cppcode} We do not care if the contents of the loop are rearranged around the load. -Nothing bad will happen so long as \texttt{stop} is only used to tell the worker to exit, and not to ``announce'' any new data. +Nothing bad will happen so long as \texttt{cancelled} is only used to tell the worker to exit, and not to ``announce'' any new data. Finally, relaxed loads are commonly used with \textsc{CAS} loops. -Return to our lock-free multiply: -\begin{cppcode} -void atomicMultiply(int by) -{ - int expected = foo.load(memory_order_relaxed); - - while (!foo.compare_exchange_weak( - expected, expected * by, - memory_order_release, - memory_order_relaxed)) { - /* empty loop */ - } -} -\end{cppcode} +Continue the example above, +a \textsc{CAS} loop is performed to claim a job. All of the loads can be relaxed as we do not need to enforce any order until we have successfully modified our value. -The initial load of \texttt{expected} is not strictly necessary but can help avoid an extra loop iteration if \texttt{foo} remains unmodified by other threads before the \textsc{CAS} operation. \subsection{Acquire-Release} From 8d7e2c2df87a19f4723478c071b70e6074585f2a Mon Sep 17 00:00:00 2001 From: idoleat Date: Sun, 16 Jun 2024 20:59:12 +0800 Subject: [PATCH 16/30] Refine rmw example and remove sleep() Static linkage is added for better practice. A new inline function wait_until is added to serve the need of waiting thread pool until give state, thus removing sleep() and corresponding header. The weak version of compare and swap is used instead due to 1. There is really no other thing in the same cache line to cause spurious fail. 2. The retry cost is considered lower than nested loop --- concurrency-primer.tex | 4 ++-- examples/rmw_example.c | 21 ++++++++++++++------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 2af6ee3..8552ba8 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -497,7 +497,7 @@ \subsection{Compare and swap} To claim a job without it being taken by another worker halfway through, we need to atomically change the pointer to the last job. Otherwise the last job is under races. The while loop in function \monobox{worker}, \begin{ccode} -while (!atomic_compare_exchange_strong(&thrd_pool->head->prev, &job, +while (!atomic_compare_exchange_weak(&thrd_pool->head->prev, &job, job->prev)) { } \end{ccode} @@ -918,7 +918,7 @@ \subsection{Relaxed} if (atomic_load_explicit(&thrd_pool->state, memory_order_relaxed) == running) { // claim the job job_t *job = atomic_load(&thrd_pool->head->prev); - while (!atomic_compare_exchange_strong_explicit(&thrd_pool->head->prev, &job, + while (!atomic_compare_exchange_weak_explicit(&thrd_pool->head->prev, &job, job->prev, memory_order_release, memory_order_relaxed)) { } diff --git a/examples/rmw_example.c b/examples/rmw_example.c index 3a787d7..41ee78c 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -1,4 +1,3 @@ -#include #include #include #include @@ -33,7 +32,7 @@ typedef struct thread_pool { idle_job_t *head; } thread_pool_t; -int worker(void *args) +static int worker(void *args) { if (!args) return EXIT_FAILURE; @@ -45,7 +44,7 @@ int worker(void *args) if (atomic_load(&thrd_pool->state) == running) { // claim the job job_t *job = atomic_load(&thrd_pool->head->prev); - while (!atomic_compare_exchange_strong(&thrd_pool->head->prev, &job, + while (!atomic_compare_exchange_weak(&thrd_pool->head->prev, &job, job->prev)) { } if (job->args == NULL) { @@ -64,7 +63,7 @@ int worker(void *args) }; } -bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) +static bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) { if (atomic_flag_test_and_set(&thrd_pool->initialezed)) { printf("This thread pool has already been initialized.\n"); @@ -102,7 +101,7 @@ bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) return true; } -void thread_pool_destroy(thread_pool_t *thrd_pool) +static void thread_pool_destroy(thread_pool_t *thrd_pool) { if (atomic_exchange(&thrd_pool->state, cancelled)) printf("Thread pool cancelled with jobs still running.\n"); @@ -120,7 +119,8 @@ void thread_pool_destroy(thread_pool_t *thrd_pool) atomic_flag_clear(&thrd_pool->initialezed); } -__attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) +__attribute__((nonnull(2))) static bool add_job(thread_pool_t *thrd_pool, + void *args) { // May use memory pool for jobs job_t *job = malloc(sizeof(job_t)); @@ -142,6 +142,13 @@ __attribute__((nonnull(2))) bool add_job(thread_pool_t *thrd_pool, void *args) return true; } +static inline void wait_until(thread_pool_t *thrd_pool, int state) +{ + while (atomic_load(&thrd_pool->state) != state) { + thrd_yield(); + } +} + int main() { thread_pool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; @@ -156,7 +163,7 @@ int main() } // Due to simplified job queue (not protecting producer), starting the pool manually atomic_store(&thrd_pool.state, running); - sleep(1); + wait_until(&thrd_pool, idle); for (int i = 0; i < N_JOBS; i++) { int *id = malloc(sizeof(int)); *id = i; From 138421e4da5c763524132f6c18fdc4e602f817ef Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Sat, 22 Jun 2024 16:45:23 +0800 Subject: [PATCH 17/30] Add BBP formula to approximate PI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Use Bailey–Borwein–Plouffe formula to approximate PI - Reference : https://github.com/sysprog21/concurrent-programs/blob/master/tpool/tpool.c 2. Add Add PRECISION constant with value 100 --- examples/rmw_example.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/examples/rmw_example.c b/examples/rmw_example.c index 41ee78c..2af6bac 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -4,11 +4,16 @@ #include #include #include +#include +#include + +#define PRECISION 100 /* upper bound in BPP sum */ #define CACHE_LINE_SIZE 64 #define N_JOBS 16 #define N_THREADS 8 + typedef struct job { void *args; struct job *next, *prev; @@ -149,6 +154,18 @@ static inline void wait_until(thread_pool_t *thrd_pool, int state) } } +/* Use Bailey–Borwein–Plouffe formula to approximate PI */ +static void *bbp(void *arg) +{ + int k = *(int *) arg; + double sum = (4.0 / (8 * k + 1)) - (2.0 / (8 * k + 4)) - + (1.0 / (8 * k + 5)) - (1.0 / (8 * k + 6)); + double *product = malloc(sizeof(double)); + if (product) + *product = 1 / pow(16, k) * sum; + return (void *) product; +} + int main() { thread_pool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; From 267f1ad2f87906cf5ea57fa263cf73bcb029c0ae Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Sun, 23 Jun 2024 17:25:10 +0800 Subject: [PATCH 18/30] Worker do the job to execute BBP 1. Add the tpool_future variable - tpool_future to pass the result to the main thread. - The mutex lock and the condition variable to ensure concurrency. 2. The main thread sequentially accumulate results from BBP that calcuate by every worker. - Wait using `tpool_future_get()` until the condition variable is broadcast to confirm that the result has been marked as __FUTURE_FINISHED. 3. Change `thread_pool` to `tpool` to improve readability. 4. Add the Makefile. --- examples/Makefile | 6 ++ examples/rmw_example.c | 143 ++++++++++++++++++++++++++++++----------- 2 files changed, 112 insertions(+), 37 deletions(-) create mode 100644 examples/Makefile diff --git a/examples/Makefile b/examples/Makefile new file mode 100644 index 0000000..6fb9f7e --- /dev/null +++ b/examples/Makefile @@ -0,0 +1,6 @@ +all: + $(CC) -Wall -o rmw_example rmw_example.c -lpthread -lm +clean: + rm -f rmw_example +check: all + ./rmw_example diff --git a/examples/rmw_example.c b/examples/rmw_example.c index 2af6bac..a3a8ba1 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -4,18 +4,26 @@ #include #include #include + #include -#include +#include #define PRECISION 100 /* upper bound in BPP sum */ #define CACHE_LINE_SIZE 64 -#define N_JOBS 16 -#define N_THREADS 8 +#define N_THREADS 64 +struct tpool_future { + atomic_int flag; + void *result; + mtx_t mutex; + cnd_t cond_finished; +}; typedef struct job { + void *(*func)(void *); void *args; + struct tpool_future *future; struct job *next, *prev; } job_t; @@ -25,9 +33,9 @@ typedef struct idle_job { job_t job; } idle_job_t; -enum state { idle, running, cancelled }; +enum state { __FUTURE_START, __FUTURE_FINISHED, idle, running, cancelled }; -typedef struct thread_pool { +typedef struct tpool { atomic_flag initialezed; int size; thrd_t *pool; @@ -35,13 +43,50 @@ typedef struct thread_pool { thrd_start_t func; // job queue is a SPMC ring buffer idle_job_t *head; -} thread_pool_t; +} tpool_t; + +static struct tpool_future *tpool_future_create(void) +{ + struct tpool_future *future = malloc(sizeof(struct tpool_future)); + if (future) { + future->flag = __FUTURE_START; + future->result = NULL; + mtx_init(&future->mutex, mtx_plain); + cnd_init(&future->cond_finished); + } + return future; +} + +void tpool_future_get(struct tpool_future *future) +{ + mtx_lock(&future->mutex); + while ((future->flag & __FUTURE_FINISHED) == 0) { + cnd_wait(&future->cond_finished, &future->mutex); + } + mtx_unlock(&future->mutex); +} + +int tpool_future_destroy(struct tpool_future *future) +{ + if (future) { + mtx_lock(&future->mutex); + if (future->flag & __FUTURE_FINISHED) { + mtx_unlock(&future->mutex); + mtx_destroy(&future->mutex); + cnd_destroy(&future->cond_finished); + free(future); + } else { + mtx_unlock(&future->mutex); + } + } + return 0; +} static int worker(void *args) { if (!args) return EXIT_FAILURE; - thread_pool_t *thrd_pool = (thread_pool_t *)args; + tpool_t *thrd_pool = (tpool_t *)args; while (1) { if (atomic_load(&thrd_pool->state) == cancelled) @@ -50,14 +95,22 @@ static int worker(void *args) // claim the job job_t *job = atomic_load(&thrd_pool->head->prev); while (!atomic_compare_exchange_weak(&thrd_pool->head->prev, &job, - job->prev)) { + job->prev)) { } if (job->args == NULL) { atomic_store(&thrd_pool->state, idle); } else { - printf("Hello from job %d\n", *(int *)job->args); + void *ret_value = job->func(job->args); + + mtx_lock(&job->future->mutex); + + job->future->flag |= __FUTURE_FINISHED; + job->future->result = ret_value; + cnd_broadcast(&job->future->cond_finished); + + mtx_unlock(&job->future->mutex); free(job->args); - free(job); // could cause dangling pointer in other threads + free(job); } } else { /* To auto run when jobs added, set status to running if job queue is not empty. @@ -66,9 +119,10 @@ static int worker(void *args) continue; } }; + return EXIT_SUCCESS; } -static bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) +static bool tpool_init(tpool_t *thrd_pool, size_t size) { if (atomic_flag_test_and_set(&thrd_pool->initialezed)) { printf("This thread pool has already been initialized.\n"); @@ -106,7 +160,7 @@ static bool thread_pool_init(thread_pool_t *thrd_pool, size_t size) return true; } -static void thread_pool_destroy(thread_pool_t *thrd_pool) +static void tpool_destroy(tpool_t *thrd_pool) { if (atomic_exchange(&thrd_pool->state, cancelled)) printf("Thread pool cancelled with jobs still running.\n"); @@ -124,16 +178,34 @@ static void thread_pool_destroy(thread_pool_t *thrd_pool) atomic_flag_clear(&thrd_pool->initialezed); } -__attribute__((nonnull(2))) static bool add_job(thread_pool_t *thrd_pool, - void *args) +/* Use Bailey–Borwein–Plouffe formula to approximate PI */ +static void *bbp(void *arg) +{ + int k = *(int *)arg; + double sum = (4.0 / (8 * k + 1)) - (2.0 / (8 * k + 4)) - + (1.0 / (8 * k + 5)) - (1.0 / (8 * k + 6)); + double *product = malloc(sizeof(double)); + if (product) + *product = 1 / pow(16, k) * sum; + return (void *)product; +} + +struct tpool_future *add_job(tpool_t *thrd_pool, void *(*func)(void *), + void *args) { // May use memory pool for jobs job_t *job = malloc(sizeof(job_t)); if (!job) - return false; + return NULL; + + struct tpool_future *future = tpool_future_create(); + if (!future) + return NULL; // unprotected producer job->args = args; + job->func = bbp; + job->future = future; job->next = thrd_pool->head->job.next; job->prev = &thrd_pool->head->job; thrd_pool->head->job.next->prev = job; @@ -143,50 +215,47 @@ __attribute__((nonnull(2))) static bool add_job(thread_pool_t *thrd_pool, // trap worker at idle job thrd_pool->head->job.prev = &thrd_pool->head->job; } - - return true; + return future; } -static inline void wait_until(thread_pool_t *thrd_pool, int state) +static inline void wait_until(tpool_t *thrd_pool, int state) { while (atomic_load(&thrd_pool->state) != state) { thrd_yield(); } } -/* Use Bailey–Borwein–Plouffe formula to approximate PI */ -static void *bbp(void *arg) -{ - int k = *(int *) arg; - double sum = (4.0 / (8 * k + 1)) - (2.0 / (8 * k + 4)) - - (1.0 / (8 * k + 5)) - (1.0 / (8 * k + 6)); - double *product = malloc(sizeof(double)); - if (product) - *product = 1 / pow(16, k) * sum; - return (void *) product; -} - int main() { - thread_pool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; - if (!thread_pool_init(&thrd_pool, N_THREADS)) { + struct tpool_future *futures[PRECISION + 1]; + double bbp_sum = 0; + + tpool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; + if (!tpool_init(&thrd_pool, N_THREADS)) { printf("failed to init.\n"); return 0; } - for (int i = 0; i < N_JOBS; i++) { + for (int i = 0; i <= PRECISION; i++) { int *id = malloc(sizeof(int)); *id = i; - add_job(&thrd_pool, id); + futures[i] = add_job(&thrd_pool, bbp, id); } // Due to simplified job queue (not protecting producer), starting the pool manually atomic_store(&thrd_pool.state, running); wait_until(&thrd_pool, idle); - for (int i = 0; i < N_JOBS; i++) { + for (int i = 0; i <= PRECISION; i++) { int *id = malloc(sizeof(int)); *id = i; - add_job(&thrd_pool, id); + add_job(&thrd_pool, bbp, id); + } + + for (int i = 0; i <= PRECISION; i++) { + tpool_future_get(futures[i]); + bbp_sum += *(double *)(futures[i]->result); + tpool_future_destroy(futures[i]); } atomic_store(&thrd_pool.state, running); - thread_pool_destroy(&thrd_pool); + tpool_destroy(&thrd_pool); + printf("PI calculated with %d terms: %.15f\n", PRECISION + 1, bbp_sum); return 0; } From 389875d7075bf2943da386b3ac5928d975652c5a Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Sun, 23 Jun 2024 22:59:38 +0800 Subject: [PATCH 19/30] Change the implementation of mutex to test and set 1. Directly show the scenarios using Test and Set and its atomic operations. - Use `atomic_flag_test_and_set()` and `atomic_flag_clear()` to implement the original mutex lock and unlock mechanism. - Replace the original condition variable wait mechanism with `atomic_flag_test_and_set()` combined with a `while` loop. 2. Avoid deadlock in `tpool_future_get()`. - The main thread must first wait for the worker to complete the "BBP formula" job. - Subsequently, it should wait for the worker to unlock. - These two operations must occur in this order to avoid deadlock. Swapping them will lead to deadlock. --- examples/Makefile | 2 +- examples/rmw_example.c | 32 +++++++++++++------------------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/examples/Makefile b/examples/Makefile index 6fb9f7e..2d2fbf2 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,5 +1,5 @@ all: - $(CC) -Wall -o rmw_example rmw_example.c -lpthread -lm + $(CC) -Wall -o rmw_example rmw_example.c -pthread -lm clean: rm -f rmw_example check: all diff --git a/examples/rmw_example.c b/examples/rmw_example.c index a3a8ba1..cb17d47 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -16,8 +16,7 @@ struct tpool_future { atomic_int flag; void *result; - mtx_t mutex; - cnd_t cond_finished; + atomic_flag lock; }; typedef struct job { @@ -51,32 +50,29 @@ static struct tpool_future *tpool_future_create(void) if (future) { future->flag = __FUTURE_START; future->result = NULL; - mtx_init(&future->mutex, mtx_plain); - cnd_init(&future->cond_finished); + atomic_flag_clear(&future->lock); } return future; } void tpool_future_get(struct tpool_future *future) { - mtx_lock(&future->mutex); - while ((future->flag & __FUTURE_FINISHED) == 0) { - cnd_wait(&future->cond_finished, &future->mutex); - } - mtx_unlock(&future->mutex); + while (future->flag != __FUTURE_FINISHED) + ; + while (atomic_flag_test_and_set(&future->lock)) + ; + atomic_flag_clear(&future->lock); } int tpool_future_destroy(struct tpool_future *future) { if (future) { - mtx_lock(&future->mutex); + while (atomic_flag_test_and_set(&future->lock)) + ; if (future->flag & __FUTURE_FINISHED) { - mtx_unlock(&future->mutex); - mtx_destroy(&future->mutex); - cnd_destroy(&future->cond_finished); free(future); } else { - mtx_unlock(&future->mutex); + atomic_flag_clear(&future->lock); } } return 0; @@ -102,13 +98,12 @@ static int worker(void *args) } else { void *ret_value = job->func(job->args); - mtx_lock(&job->future->mutex); - + while (atomic_flag_test_and_set(&job->future->lock)) + ; job->future->flag |= __FUTURE_FINISHED; job->future->result = ret_value; - cnd_broadcast(&job->future->cond_finished); - mtx_unlock(&job->future->mutex); + atomic_flag_clear(&job->future->lock); free(job->args); free(job); } @@ -248,7 +243,6 @@ int main() *id = i; add_job(&thrd_pool, bbp, id); } - for (int i = 0; i <= PRECISION; i++) { tpool_future_get(futures[i]); bbp_sum += *(double *)(futures[i]->result); From d9dd1522f445a355d277a6a71cb615eba8b21b5f Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Mon, 24 Jun 2024 00:10:51 +0800 Subject: [PATCH 20/30] Check if job is finished by the result of future 1. Check if `future->result` is NULL. - If `future->result` is NULL, the job is still in progress. - If `future->result` is not NULL, the job has been completed by the worker. --- examples/rmw_example.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/examples/rmw_example.c b/examples/rmw_example.c index cb17d47..96ec90f 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -14,7 +14,6 @@ #define N_THREADS 64 struct tpool_future { - atomic_int flag; void *result; atomic_flag lock; }; @@ -32,7 +31,7 @@ typedef struct idle_job { job_t job; } idle_job_t; -enum state { __FUTURE_START, __FUTURE_FINISHED, idle, running, cancelled }; +enum state { idle, running, cancelled }; typedef struct tpool { atomic_flag initialezed; @@ -48,7 +47,6 @@ static struct tpool_future *tpool_future_create(void) { struct tpool_future *future = malloc(sizeof(struct tpool_future)); if (future) { - future->flag = __FUTURE_START; future->result = NULL; atomic_flag_clear(&future->lock); } @@ -57,7 +55,7 @@ static struct tpool_future *tpool_future_create(void) void tpool_future_get(struct tpool_future *future) { - while (future->flag != __FUTURE_FINISHED) + while (future->result == NULL) ; while (atomic_flag_test_and_set(&future->lock)) ; @@ -69,7 +67,7 @@ int tpool_future_destroy(struct tpool_future *future) if (future) { while (atomic_flag_test_and_set(&future->lock)) ; - if (future->flag & __FUTURE_FINISHED) { + if (future->result != NULL) { free(future); } else { atomic_flag_clear(&future->lock); @@ -100,7 +98,6 @@ static int worker(void *args) while (atomic_flag_test_and_set(&job->future->lock)) ; - job->future->flag |= __FUTURE_FINISHED; job->future->result = ret_value; atomic_flag_clear(&job->future->lock); From 5a060d3f1d9590aab076afc1369c28df8a3f9906 Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Mon, 24 Jun 2024 15:20:31 +0800 Subject: [PATCH 21/30] Avoid the memory leak in the add_job function 1. When allocating memory for future, if the allocation fails, do not simply return NULL. Instead, release the memory allocated for job beforehand to avoid memory leaks. --- examples/Makefile | 2 +- examples/rmw_example.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/Makefile b/examples/Makefile index 2d2fbf2..f5a58e6 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -3,4 +3,4 @@ all: clean: rm -f rmw_example check: all - ./rmw_example + ./rmw_example \ No newline at end of file diff --git a/examples/rmw_example.c b/examples/rmw_example.c index 96ec90f..2bdceb7 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -191,8 +191,10 @@ struct tpool_future *add_job(tpool_t *thrd_pool, void *(*func)(void *), return NULL; struct tpool_future *future = tpool_future_create(); - if (!future) + if (!future){ + free(job); return NULL; + } // unprotected producer job->args = args; From 597a90d98c87bf1dea0bfdf5aba8b2fa679b3a8a Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Tue, 25 Jun 2024 13:44:02 +0800 Subject: [PATCH 22/30] Simplify the operation of test and set 1. When creating the future, set the future's flag, which is akin to assigning the job. Afterward, transfer the ownership to the worker. Once the worker completes the job, clear the flag and return the ownership, which is akin to submitting a job. Then, the main thread can regain ownership. By doing this, the main thread can wait directly for the result through test and set without checking if the result is NULL. This avoids the situation where the flag could be set to true by the main thread before the worker starts the job. Additionally, the worker does not need to check with test and set before performing the job. 2. Drop the `atomic_flag_clear` in `tpool_future_wait` function and then directly free the pointer of future and its result in `tpool_future_destroy` function. 3. Rename the variable 'lock' in the future structure to 'flag'. Rename the function name `tpool_future_get` to `tpool_future_wait`. Co-authored-by: Chih-Wei Chien Signed-off-by: Wei-Hsin Yeh --- examples/rmw_example.c | 35 ++++++++++------------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/examples/rmw_example.c b/examples/rmw_example.c index 2bdceb7..e300a55 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -6,7 +6,6 @@ #include #include -#include #define PRECISION 100 /* upper bound in BPP sum */ @@ -15,7 +14,7 @@ struct tpool_future { void *result; - atomic_flag lock; + atomic_flag flag; }; typedef struct job { @@ -48,32 +47,22 @@ static struct tpool_future *tpool_future_create(void) struct tpool_future *future = malloc(sizeof(struct tpool_future)); if (future) { future->result = NULL; - atomic_flag_clear(&future->lock); + atomic_flag_clear(&future->flag); + atomic_flag_test_and_set(&future->flag); } return future; } -void tpool_future_get(struct tpool_future *future) +void tpool_future_wait(struct tpool_future *future) { - while (future->result == NULL) + while (atomic_flag_test_and_set(&future->flag)) ; - while (atomic_flag_test_and_set(&future->lock)) - ; - atomic_flag_clear(&future->lock); } -int tpool_future_destroy(struct tpool_future *future) +void tpool_future_destroy(struct tpool_future *future) { - if (future) { - while (atomic_flag_test_and_set(&future->lock)) - ; - if (future->result != NULL) { - free(future); - } else { - atomic_flag_clear(&future->lock); - } - } - return 0; + free(future->result); + free(future); } static int worker(void *args) @@ -95,12 +84,8 @@ static int worker(void *args) atomic_store(&thrd_pool->state, idle); } else { void *ret_value = job->func(job->args); - - while (atomic_flag_test_and_set(&job->future->lock)) - ; job->future->result = ret_value; - - atomic_flag_clear(&job->future->lock); + atomic_flag_clear(&job->future->flag); free(job->args); free(job); } @@ -243,7 +228,7 @@ int main() add_job(&thrd_pool, bbp, id); } for (int i = 0; i <= PRECISION; i++) { - tpool_future_get(futures[i]); + tpool_future_wait(futures[i]); bbp_sum += *(double *)(futures[i]->result); tpool_future_destroy(futures[i]); } From 07e0f1c9e12d9d195ff8c3a551b47168aaf74585 Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Tue, 25 Jun 2024 14:38:00 +0800 Subject: [PATCH 23/30] Avoid memory leak in the bbp function 1. When allocating memory for the product, if the allocation fails, it returns NULL. Co-authored-by: Chih-Wei Chien Signed-off-by: Wei-Hsin Yeh --- examples/rmw_example.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/rmw_example.c b/examples/rmw_example.c index e300a55..155438a 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -162,8 +162,10 @@ static void *bbp(void *arg) double sum = (4.0 / (8 * k + 1)) - (2.0 / (8 * k + 4)) - (1.0 / (8 * k + 5)) - (1.0 / (8 * k + 6)); double *product = malloc(sizeof(double)); - if (product) - *product = 1 / pow(16, k) * sum; + if (!product) + return NULL; + + *product = 1 / pow(16, k) * sum; return (void *)product; } From a997d4e0e422c739a25b15493c033d5dbfc29626 Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Wed, 26 Jun 2024 11:32:57 +0800 Subject: [PATCH 24/30] Integrate RMW with concepts from previous sections 1. Use 2 figures to connect concepts from the first 3 sections. - Figure atomic_rmw illustrates that atomic operations consist of not only a single operation but a group of operations that need to perform atomically. - Figure rmw_communicate shows how this atomic group of operations can be used on shared resource for communication. 2. Discuss how to ensure the operations of accessing the shared resource for communication between concurrent threads are correct: - Use Test and Set and Compare and Swap as examples to illustrate how this can be achieved. 3. Compare the usage scenarios of Exchange and Fetch and ... 4. Introduce the concept that we can utilize atomic operations to ensure that a group of operations can perform atomically. --- concurrency-primer.tex | 199 ++++++++++++++++++++++++---------------- images/atomic_rmw.pdf | Bin 0 -> 9721 bytes images/atomic_types.pdf | Bin 0 -> 12512 bytes 3 files changed, 121 insertions(+), 78 deletions(-) create mode 100644 images/atomic_rmw.pdf create mode 100644 images/atomic_types.pdf diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 8552ba8..1d9ae0a 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -217,7 +217,7 @@ \section{Background} Initially, any optimizing compiler will restructure your code to enhance performance on its target hardware. The primary objective is to maintain the operational effect within \emph{the current thread}, allowing reads and writes to be rearranged to prevent pipeline stalls\footnote{% -Most \textsc{CPU} architectures execute segments of multiple instructions concurrently to improve throughput (refer to \fig{pipeline}). +Most \textsc{CPU} architectures execute segments of multiple instructions concurrently to improve throughput (refer to \fig{fig:pipeline}). A stall, or suspension of forward progress, occurs when an instruction awaits the outcome of a preceding one in the pipeline until the necessary result becomes available.} or to optimize data locality.\punckern\footnote{% \textsc{RAM} accesses data not byte by byte, but in larger units known as \introduce{cache lines}. Grouping frequently used variables on the same cache line means they are processed together, @@ -232,21 +232,21 @@ \section{Background} Even without compiler alterations, we would face challenges because our hardware complicates matters further! Modern \textsc{CPU}s operate in a fashion far more complex than what traditional pipelined methods, -like those depicted in \fig{pipeline}, suggest. +like those depicted in \fig{fig:pipeline}, suggest. They are equipped with multiple data paths tailored for various instruction types and schedulers that reorder and direct instructions through these paths. \includegraphics[keepaspectratio,width=0.7\linewidth]{images/pipeline} \captionof{figure}{A traditional five-stage \textsc{CPU} pipeline with fetch, decode, execute, memory access, and write-back stages. Modern designs are much more complicated, often reordering instructions on the fly.} -\label{pipeline} +\label{fig:pipeline} It is quite common to form oversimplified views about memory operations. -Picturing a multi-core processor setup might lead us to envision a model similar to \fig{ideal-machine}, +Picturing a multi-core processor setup might lead us to envision a model similar to \fig{fig:ideal-machine}, wherein each core alternately accesses and manipulates the system's memory. \includegraphics[keepaspectratio, width=0.8\linewidth]{ideal-machine} \captionof{figure}{An idealized multi-core processor where cores take turns accessing a single shared set of memory.} -\label{ideal-machine} +\label{fig:ideal-machine} The reality is far from straightforward. Although processor speeds have surged exponentially in recent decades, @@ -260,7 +260,7 @@ \section{Background} \includegraphics[keepaspectratio, width=0.8\linewidth]{images/mp-cache} \captionof{figure}{A common memory hierarchy for modern multi-core processors} -\label{dunnington} +\label{fig:dunnington} The myriad complexities within multithreaded programs on multi-core \textsc{CPU}s lead to a lack of a uniform concept of ``now''. Establishing some semblance of order among threads requires a concerted effort involving the hardware, @@ -348,15 +348,15 @@ \section{Atomicity} \includegraphics[keepaspectratio, width=0.8\linewidth]{images/atomicity} \captionof{figure}{A flowchart depicting how two concurrent programs communicate and coordinate through a shared resource to achieve a goal, accessing the shared resource.} -\label{atomicity} +\label{fig:atomicity} -Summary of concepts from the first three sections, as shown in \fig{atomicity}. +Summary of concepts from the first three sections, as shown in \fig{fig:atomicity}. In \secref{background}, we observe the importance of maintaining the correct order of operations: t3 \to t4 \to t5 \to t6 \to t7, so that two concurrent programs can function as expected. In \secref{seqcst}, we see how two concurrent programs communicate to guarantee the order of operations: t5 \to t6. In \secref{atomicity}, we understand that certain operations must be treated as a single atomic step to ensure the order of operations: t3 \to t4 \to t5 and the order of operations: t6 \to t7. \section{Arbitrarily-sized ``atomic'' types} - +\label{atomictype} Along with \cc|atomic_int| and friends, \cplusplus{} provides the template \cpp|std::atomic| for defining arbitrary atomic types. \clang{}, lacking a similar language feature but wanting to provide the same functionality, @@ -384,93 +384,87 @@ \section{Read-modify-write} \label{rmw} So far we have introduced the importance of order and atomicity. -The latter ensures that an operation can eventually finish without being interfered by other operations. -This also establishes ordering between operations, as no operations can occur concurrently. -For two operations, A and B, either A happens before B or B happens before A. -As in \secref{seqcst}, a local order of other operations associated to the an atomic object is given as well, with \introduce{sequential consistency} as default consistency level. -Since happens before relation is transitive, just like $>$ and $<$, a global order is established by combining local order and inter-thread order provided by atomic objects. - -Atomic loads and stores are all well and good when we don't need to consider the previous state of atomic variables. -But sometimes we need to read a value, modify it, -and write it back as a single atomic step. -That is, the modification is based on the previous state that is visible for reading, and the result is then written back. +In \secref{seqcst}, we see how an atomic object ensures the order of single store or load operation is not reordered by the compiler within a program. +Only upon establishing the correct inter-thread order can we continue to pursue how multiple threads can establish a correct cross-thread order. +After achieving this goal, we can further explore how concurrent threads can coordinate and collaborate smoothly. +In \secref{atomicity}, there is a need for atomicity to ensure that a group of operations is not only sequentially executed but also completes without being interrupted by operation from other threads. +This establishes correct order of operations from different threads. + +\includegraphics[keepaspectratio, width=0.6\linewidth]{images/atomic_rmw} +\captionof{figure}{Exchange, Test and Set, Fetch and…, Compare and Swap can all be transformed into atomic RMW operations, ensuring that operations like t1 \to t2 \to t3 will become an atomic step.} +\label{fig:atomic_rmw} + +Atomic loads and stores are all well and good when we do not need to consider the previous state of atomic variables, but sometimes we need to read a value, modify it, and write it back as a single atomic step. +As shown in \fig{fig:atomic_rmw}, the modification is based on the previous state that is visible for reading, and the result is then written back. A complete \introduce{read-modify-write} operation is performed atomically to ensure visibility to subsequent operations. + +Furthermore, for communication between concurrent threads, a shared resource is required, as shown in \fig{fig:atomicity} +Think back to the discussion in previous sections. +In order for concurrent threads to collaborate on operating a shared resource, we need a way to communicate. +Thus, the need for a channel for communication arises with the appearance of the shared resource. + +As discussed earlier, the process of accessing shared resources responsible for communication must also ensure both order and non-interference. +To prevent the recursive protection of shared resources, +atomic operations can be introduced for the shared resources responsible for communication, as shown in \fig{fig:atomic_types}. -There are a few common \introduce{read-modify-write} (\textsc{RMW}) operations. +There are a few common \introduce{read-modify-write} (\textsc{RMW}) operations to make theses operation become a single atomic step. In \cplusplus{}, they are represented as member functions of \cpp|std::atomic|. In \clang{}, they are freestanding functions. -Following example code is a simplify implementation of thread pool to demonstrate the use of \clang{}11 atomic library. - -\inputminted{c}{./examples/rmw_example.c} - -Compile the code with \monobox{gcc rmw\_example.c -o rmw\_example -Wall -Wextra -std=c11 -pthread} and execute the program. -A thread pool has three states: idle, cancelled and running. -It is initialized with \monobox{N\_THREADS} (default 8) of threads. -\monobox{N\_JOBS} (default 16) of jobs are added, and the pool is then set to running. -A job is simply echoing its job ID. -\monobox{sleep(1)} is used to ensure that the second batch of jobs is added after the first batch is finished; otherwise, jobs may not be consumed as expected. -Thread pool is then destroyed right after starting running. -Possible stdout of the program is: -\begin{ccode} -Hello from job 5 -Hello from job 8 -Hello from job 9 -Hello from job 10 -Hello from job 11 -Hello from job 12 -Hello from job 13 -Hello from job 14 -Hello from job 15 -Hello from job 3 -Hello from job 1 -Hello from job 6 -Hello from job 4 -Hello from job 7 -Hello from job 2 -Hello from job 0 -Hello from job 0 -Hello from job 1 -Hello from job 3 -Hello from job 2 -Thread pool cancelled with jobs still running. -\end{ccode} +\includegraphics[keepaspectratio, width=1\linewidth]{images/atomic_types} +\captionof{figure}{Test and Set (Left) and Compare and Swap (Right) leverage their functionality of checking and their atomicity to make other RMW operations perform atomically. +The red color represents atomic RMW operations, while the blue color represents RMW operations that behave atomically.} +\label{fig:atomic_types} \subsection{Exchange} \label{exchange} - -The simplest atomic \textsc{RMW} operation is an \introduce{exchange}: -the current value is read and replaced with a new one. -In function \monobox{thread\_pool\_destroy}, \monobox{atomic\_exchange(\&thrd\_pool->state, cancelled)} reads current state and replaces it with "cancelled". A warning message is printed if the pool is destroyed when still running. -If the exchange is not performed atomically, we may initially get the state as "running". Subsequently, a thread could set the state to "cancelled" after finishing the last one, resulting in a false warning. +Transform \textsc{RMW} into modifying a private variable first, +and then directly swapping the private variable with the shared variable. +Therefore, we only need to ensure that the second step, +which involves Read that load the shared variable and then Modify and Write that exchange it with the private variable, +is a single atomic step. +This allows programmers to extensively modify the private variable beforehand and only write it to the shared variable when necessary.  \subsection{Test and set} - +\label{Testandset} \introduce{Test-and-set} works on a Boolean value: we read it, set it to \cpp|true|, and provide the value it held beforehand. \clang{} and \cplusplus{} offer a type dedicated to this purpose, called \monobox{atomic\_flag}. -The value of the flag is indeterminate until initialized with \monobox{ATOMIC\_FLAG\_INIT} macro. -A thread pool has a \monobox{atomic\_flag} indicating it's initialized or not. The flag ensures initialization is thread-safe, preventing a pool from being reinitialized. -Function \monobox{thread\_pool\_init} sets the flag with \monobox{atomic\_flag\_test\_and\_set(\&thrd\_pool->initialezed)} first. -If the return value is \monobox{true}, initialization is not performed again. -Function \monobox{thread\_pool\_destroy} clears the flag with \monobox{atomic\_flag\_clear(\&thrd\_pool->initialezed)} after destroying everything. +The initial value of an \monobox{atomic\_flag} is indeterminate until initialized with \monobox{ATOMIC\_FLAG\_INIT} macro. -\subsection{Fetch and…} +\introduce{Test-and-set} operations are not limited to just \textsc{RMW} functions; +they can also be utilized for constructing simple spinlock. +In this scenario, the flag acts as a shared resource for communication between threads. +Thus, spinlock implemented with \introduce{Test-and-set} operations ensures that entire \textsc{RMW} operations on shared resources are performed atomically, as shown in \fig{fig:atomic_types}. +\label{spinlock} +\begin{ccode} +atomic_flag af = ATOMIC_FLAG_INIT; -We can also read a value, -perform a simple operation on it (such as addition, subtraction, -or bitwise \textsc{AND}, \textsc{OR}, \textsc{XOR}) and return its previous value, -all as part of a single atomic operation. -In the function \monobox{thread\_pool\_destroy}, \monobox{atomic\_fetch\_and} is utilized as a means to set the state to idle. -Yet, in this case, it is not necessary, as the pool needs to be reinitialized for further use regardless. -Its return value could be further utilized, for instance, to report the previous state and perform additional actions. +void lock() +{ + while (atomic_flag_test_and_set(&af)) { /* wait */ } +} + +void unlock() { atomic_flag_clear(&af); } +\end{ccode} +If we call \cc|lock()| and the previous value is \cc|false|, +we are the first to acquire the lock, +and can proceed with exclusive access to whatever the lock protects. +If the previous value is \cc|true|, +someone else has acquired the lock and we must wait until they release it by clearing the flag. + +\subsection{Fetch and…} +Transform \textsc{RMW} to directly modify the shared variable (such as addition, subtraction, +or bitwise \textsc{AND}, \textsc{OR}, \textsc{XOR}) and return its previous value, +all as part of a single atomic operation. +Compare with \introduce{Exchange} \secref{exchange}, when programmers only need to make simple modification to the shared variable, +they can use \introduce{Fetch and…}. \subsection{Compare and swap} \label{cas} - Finally, we have \introduce{compare-and-swap} (\textsc{CAS}), sometimes called \introduce{compare-and-exchange}. -It allows us to conditionally exchange a value \emph{if} its previous value matches some expected one. +It allows us to conditionally exchange a value \emph{if} its previous value matches the expected one. In \clang{} and \cplusplus{}, \textsc{CAS} resembles the following, if it were executed atomically: \begin{ccode} @@ -492,6 +486,55 @@ \subsection{Compare and swap} Indeed, there is. However, we will delve into that topic later in \secref{spurious-llsc-failures}. \end{samepage} +Because \textsc{CAS} involves an expected value comparison, +it allows \textsc{CAS} operations to extend beyond just \textsc{RMW} functions. +Here's how it works: First, read the shared resource and use this value as the expected value. +Modify the private variable, and then \textsc{CAS}. Compare the current shared variable with the expected shared variable. +If they match, it indicates that modify is exclusive, ant then write by swaping the shared variable with the private variable. +If they don't match, it implies that interference from another thread has occurred. +Subsequently, update the expected value with the current shared value and retry modify in a loop. +This iterative process allows \textsc{CAS} to serve as a communication mechanism between threads, +ensuring that entire \textsc{RMW} operations on shared resources are performed atomically. +As shown in \fig{fig:atomic_types}, compared with \introduce{Test-and-set} \secref{Testandset}, +a thread that employs \textsc{CAS} can directly use the shared resource to check. +It uses atomic \textsc{CAS} to ensure that Modify is atomic, +coupled with a while loop to ensure that the entire \textsc{RMW} can behave atomically. + +\subsection{example} +\label{rmw_example} +Following example code is a simplify implementation of thread pool to demonstrate the use of \clang{}11 atomic library. + +\inputminted{c}{./examples/rmw_example.c} + +%Compile the code with \monobox{gcc rmw\_example.c -o rmw\_example -Wall -Wextra -std=c11 -pthread} and execute the program. +%A thread pool has three states: idle, cancelled and running. +%It is initialized with \monobox{N\_THREADS} (default 8) of threads. +%\monobox{N\_JOBS} (default 16) of jobs are added, and the pool is then set to running. +%A job is simply echoing its job ID. +%\monobox{sleep(1)} is used to ensure that the second batch of jobs is added after the first batch is finished; otherwise, jobs may not be consumed as expected. +%Thread pool is then destroyed right after starting running. +Stdout of the program is: +\begin{ccode} +PI calculated with 101 terms: 3.141592653589793 +\end{ccode} + +\textbf{Exchange} +In function \monobox{thread\_pool\_destroy}, \monobox{atomic\_exchange(\&thrd\_pool->state, cancelled)} reads current state and replaces it with "cancelled". A warning message is printed if the pool is destroyed when still running. +If the exchange is not performed atomically, we may initially get the state as "running". Subsequently, a thread could set the state to "cancelled" after finishing the last one, resulting in a false warning. + +\textbf{Test and set} +In the example, the scenario is as follows: +First, the main thread initially acquire a lock \monobox{future->flag} and then set it true, +which is akin to creating a job and then transfer its ownership to the worker. +Subsequently, the main thread will be blocked until the worker clear the flag. +This inidcate the main thread will wail until the worker completes the job and return the ownership back to the main thread, which ensure correct cooperation. + +\textbf{Fetch and…} +In the function \monobox{thread\_pool\_destroy}, \monobox{atomic\_fetch\_and} is utilized as a means to set the state to idle. +Yet, in this case, it is not necessary, as the pool needs to be reinitialized for further use regardless. +Its return value could be further utilized, for instance, to report the previous state and perform additional actions. + +\textbf{Compare and swap} Once threads are created in the thread pool as workers, they will continuously search for jobs to do. Jobs are taken from the tail of job queue. To claim a job without it being taken by another worker halfway through, we need to atomically change the pointer to the last job. Otherwise the last job is under races. @@ -516,7 +559,7 @@ \subsection{Compare and swap} The following diff patch removes the atomicity of claiming a job and uses pthread instead of \clang{}11 thread, because thread sanitizer currently hasn't support \clang{}11 thread yet. Save diff as \monobox{racer.diff} and patch the example code by \monobox{\$ patch rmw\_example.c race.diff}. -\inputminted{diff}{./examples/racer.diff} +%\inputminted{diff}{./examples/racer.diff} After compiling and running the example, you will see warning messages printed and same job IDs got echoed repeatly. The top two sections of a warning message indicate which two threads executed which function causing the data race. @@ -1254,7 +1297,7 @@ \section{Additional Resources} \textit{\cpp|atomic<> Weapons|: The \cplusplus{11} Memory Model and Modern Hardware}} by Herb Sutter, a three-hour talk that provides a deeper dive. -Also the source of figures \ref{ideal-machine} and \ref{dunnington}. +Also the source of figures \ref{fig:ideal-machine} and \ref{fig:dunnington}. \href{https://www.akkadia.org/drepper/futex.pdf}{\textit{Futexes are Tricky}}, a paper by Ulrich Drepper on how mutexes and other synchronization primitives can be built in Linux using atomic operations and syscalls. diff --git a/images/atomic_rmw.pdf b/images/atomic_rmw.pdf new file mode 100644 index 0000000000000000000000000000000000000000..9f105d0ada800b8aa496ae164866dd6315ec4ccf GIT binary patch literal 9721 zcma)?2RNKf_wW-XI?;kCS)FKG)aaH~Lo5k`L|d$~MDGMaqD6@w5~33&h%TacqPJ+P zm*}E@D|zz#pZ9(K-*ul*hN2l6mJ5Wf&1d->+aQ$j%i5MXCw zMJOQw5Ku#)ESxL>LKqR85C8xOz>qdh2nWp5#@GoVk1(|}Ll85^mbf2ceWD}O`PEQDw!?S;=17yETnvoru8n}X}ufhTKj!(Hy_0Z1YKBd zpPvKQoJLnkY)mxF`I?UBb3AJ-R~Iw)xvTJr#jkrQyqa1Ea)dIShy-+BMg$9HK6yMc z`6lr!@PK&X)RZ<{R{QSb4$jsC!k`b@(e3=3es1@|Sk8M|9&luRj3b#Lu&f3Lx%nU4 zniiYiNHfXrpyrDWCCV$yB7~{DgBy@%QW$Pu`#Ns|&r6jLHubr4Ue)m<|2^}`kKTQL;0x-mK~ovE)}4K$f(mZ>}QRt(t- zaVG+RWiqM+-kMHRF-ydr76a5T^>m;B_pk^3C{EO7jqzOv*@I*lY7|Hw>w63dXeMl_)ednwt9xh()qn zzM>@6g_N*HF47h=lt8Fo?Hk1pPkuy4yY#Qfx~GJ{Mo#;WSz(8Vk$46N0QJzYqMgdc zvTR;*Pco*abvLh>bF5^Z3x>;NPrt^}A%z!-=r00f}201!aH)EM)Li6~6P?@s}k9m?q++U9&fJ|IB&Pefvr zFt5Lif&VrJ5YTqFM*svK7+W9!>;TX&;}47-5SYF|f7%M%N0=dv|l&?w7;@t>i$Qp3~+LA zM*Jy3FMNT{CU#+hGK~juc%)?sB75k?YHT zL$!mF6k|s<9iEqUN8Uy2>#HBv);@l^?9G*>WoAn^mw)YhF<+(n@yAqhz=Jb^np30f3#8Bu+>Xd&C%WZ3em1HeNa5O~pL~ngDrdsLxrBI&j1|3A_%VfE(?wl0 z;sN|_7)gH6^_rP8()~NZK2`_wCvU7PLIh+Q=uT8`4fm#n+W!>XZwT@^pReMHFAf(Z zdymH7KUek9t}@fZJ|fvqgN8^wr`66ZFL}6s?Lil7k(Kd`?3B3fI|Om4!#x34X+Lz= zbbO5?ql%0&5!ds(w+{H(3F^|UZEH6teJ{!5ug}ot_Lzx}v6#uq?k6OLM@7pQY>*a} zYpN)t_Cv)}lIy51EIvr)*4s3DP9I;_cZm*@8=2BRe&i}TMO{D4=h_ILo;IwTcF@q> zit?m#H8$SL&mWu^q%{aK?5(WbE>Wm!y3G}*i8%Y5+FA_u1v9SJJPS*#+@5NY_T3GZu0ML*)2LK+_&)63sjJ~u zocJ9XF-b||&9AtCpZ@S1s--+!aCAv`ySzE#ZVg&=oUd)!?coQLH<~Xy^UB&YTXRuD z-*HtTCuY)z5Cb}UvCQd*@RJ+6Q<8X*68%G12)9Pw0-RtYqbX`rZMs?M2rt^q!K{KA zkIWwd0h^T`WA9rV_r{4glNGbxqDE^6xVZIx!feAyRz2!Z>F9BpdfLNFwehUV?F$ck z;$FGUJ5wjt!p$Xe=HouvL2;{!P9)2jUVBaW4as6}QN>DAExw5AdYQ1sSmUxeR$_g0 zSX3rZyxdB%F;LHYp{g?)PH$-Fn;5w#ND^E=W0iDxU6)CCM8p4}j_c_`I?YsJb9Gm# z7E;KTgPNeC@s@ytM^!ftV1Cz*|0w2qprv6|n>8hMU}LJL@elpJ z)}y)GJ`ayki17Ty@wYXu<%MJOEo4mhNCV&1{Cu8bu({sT^VDajMMtAh4tqC&U|&=d zD=CWu;-61WAkhCv)r;rkC4Yi^BYHO-hd5n%sE}%ERs>Y)l!s;#=bGHofX6)Ij8a{p zsBboKFjRS0eH;5caIwWwSiY5>YHy?ayTH?l$!*G+h>c{s@BLD3&wMX<%=sPlM~l95 zK$I#yybtm+Hf5XynDPuIU3SAEMXXRC)gL~v2<$q#m86pgz!%CSVI>JM-fGdlVdEyL zRuw<7Z5S@-CN%V+&!inUkNjPr)QLWy*k7|%;?+dAD#^7!Oz9+!>;I{C{K(>H@gu6K zZ{vl1vnaO6#xd-uc8VLK;y&&|iE>|Xebo8P_g0Q9AIY)Y#REVr1!3uBC(dl2SHxD#%Ff7MHhyW04QON&vUVQPT-F>NsLa^P z*in?Ucxz#3vCv!H*P3=P*7;z3`>fWC!Euj40%w_wF^gx?Sz}x(h4{H2cKp-Z2z_Cx z1GQr!Yhgj}eOO1wv8Ky;=?V3xQQOJ#rLH3yr-(C`mfcdl7RVjilyigOih%LNr+hiS z`vcFU+Zj>@;Wg`THWJkA0%m5?pF+!?8w4{QB8~`zoue5!L+hM zFU`7}0F6hLlopqbV;~x$w+XGaWWAwyey-o=GJ^_hU11=oCL6f7J!K`-!N)Q_v1W!l z)DH4Yq28%2n6;rKA&6clz(hqrIz6neuvHD9u8&>!Wt zb!+(BG$psT3pFL@Jo)dFB!|DHvN@_m57&9ry`$-WcMzyXbn0yJZe&2H!p=TsLn`-HcYox5zk?FG@&EZvs^I z=I~EX*EkX1R)T3$5_UEYHy#3rDs!~5PH|$ShnSi^oL#HJdov5Rc~VoMuK~R{+?@2d z?Vo?I!tQ&i^Lkv*WQL}a-1he5=bhqR{221Oy^F7(ew^=+rJWt8%Zi{`)o{;=Z3bR& z z;EYEUFI`&~iRnayIW4AqrN14;yC|_(PQOl1XBIPU`%Z^L(42vNtjX98A|K9O^!?Cc`sW~P6{P}ak z#riYrHh5nw(pg_*XrBors_S?rUyFCPwPjoQ+OQ|+0Sm_(Z*62_G#_)+PP%ur86-3e znRdUF;MDDU^;g;$KRUnbL3f&fZh5bpm7X*=S6$O7jn!tOO+KxE#FN;FSe1a$t2Igt{jZkjC9BsI!2tdmoo(E z{jJ`KV0|6SRC`m~-m`5=gSni!h41%ZKKfdN@vR0$xm8-bLkxNz4VGw2XzR@*XS&G; z!H>C;XpPf`%@>cJ)spa=;xJM5z=P-mKuMjfp)v$0stzWzPyV#m-J+DikZ7-C5dcv! zJ=CLfhWd7h%1vj@`{>IbCRq!3P}4%8>g23H|GxZ#dHl zMhGhIq{An%v~Bf!`LlT{-+WaWRC$&uM2lxZ+a z)L~i-#2p|W6TZ>C(bw6Nlv>V~FBZDCg%C*mw#AhQ$(+^$<1z3J%3P|&iuqj}1T+&n z2C+9K&2?>C;DF(TYA@#T_wc*$t=|^{-2?3cedXT)GXbK?DQ2lQiDqwClF?~Y39(?+ z6htZ_5%G30jXL2Cg%BSFO(Kn+Re?~w!iBI`6{8@vXx~dEMpyP|N6y zufl1l%cs>Xc>{m4x~Wo>=J|FM+1ty-L|?{~%(pX>y9-%Kt(r}W4JAJ&IOk}$%p!HtL-&fd#B%HUczq<7y~}g87=^>CAxq z>-m29b&@4cnZP6Y2)pv4X~iwYGBeh6c4(+VnU5L-9$)z!%pXME&pi;W$UUUy7Q^Y44C&WK-ZtGSh>ot7bC-Rgw*OwWG zyRKCE4a6FvLwYa1Lzwhla35${iE~m}yh>fi!XW$9;7qJftmt8p^X`wiw@^C2W+zDr zJQLk^ygk!QtKJ?G>(NmG+f~tbjI?zPOJhGgJ{p^!CUK@kb@4n&J`NVxDU;^2EATca zlA1m$8pwGT{2OA~VNUKSM8wMJ?p5%N!MN);kGwJ5(2Wx?Gu$s@Yre!9|WC}b;U-PuQ z3HbQ7|8R!(v)T9TJBN&-jzPzfGc#V9MdZOQl9y-vo45C>OnWmXesa~<@8EeC!43+R zTrHkT#C@L_sZjVlJ@;;J{icqjWn$9&Bhk>|jsj4NK0SL`n|5QmjfnQOaW~A*7E(Qj z+Amu1sjVv`R#aHmYC)!qIr6nI-fb2;UtTw7XE{$S$3FkMAY9J_WQkK3C;7g|f+@jTsWZ*u_QUL15Zr>j4Q z=&@SH+K<98%Pe7m1ye8cBEIkrVDiaEDqX_0^ zwgk1>#LtAnvb*na81Oc#bt7j%Vm+j)wZo>WimRe;l{x8b@`R@2`iR+^x}khcg)#kX z5~*~muf}(l2;x}Qs2_FVSKZ4!H*WI-Q$~5e^pa2Gn`YZ`9ax&1w4Y zzV9rPF);dADqrxX!ixK@;v1`FusOG$dOSgwT2~8Vi8{{1I$%HcEcV-MN^V9a=`6NV z>L}S#%Cet}lDLzM^;jd0`utoanq7jPhN)Gt;*-TzSf-|uMMMeb53170(IMJH51fti z>-!tfq8wzP>f_ZyG}GaS=HtJk|!slq@SanS_OEI#EUWUVVw#hs;XepojR`WxZoNtTz zu~Rn-)O1(!l#W6>o&|oY;Z1qjnmj1WHjP~qS|Y!c!Q}l(u*VFfN9~;v8{@8Eugmag zoI|y=fXf38?h)yc<}WThnGE>HO?;_|dqqURTFd}1kOvcm8P+?yYg=$qN!+lM zX&-hazCkhSIqVWaz_1%QgZ_MC%T~cHm^q!UA8=5wJVp{z3|)5}a)TVsV@VKvVxVjz zrU(Y%&f}P;x37CVQp2tRlx!wREP01BbDpR)Y~@>27<^Hluz0#K**tmrqE+G9go#7G zPQk#(HZY6-zAq1N2qLRdh^_-@0?i*Vl)&zL@pS=N8(#IrYhL4M`9$$286JG=`|zcr z%;`=>C01(Dn(vn1@{6O?%X4VoOQ}YIc}X{cvh(vztTNY~n(dU!V7v9;Yf+;lbbffR zxm^-2KmPy(v^{Ua^$%y!2ZRKLKsj>qs7=)_q#vJOe!>p5 z(hoI)V;8hZr$sh=iC{f9U(-x;s0* z3~V4gIUSH#IJVt=KJbL?4v^<2G^=UaX(9YPCm<})W;D+qE0Mdy>I+S#l$CBDgR#orqb2eVN?nt?0#Uk2G>;1}-QmI>+I`3YMMW%mQ zM*ci`J|^1xB{G^2{zCOR^u`?)N@FLISEbbbaxfn7&(NJO!288M(hV!vOV2n0Namo$ z5%sNj+viYA!G3+*SoOe|fmKeCexLNo(%|}+9DA65XGDFd?>^7I&qmhJ&i{aX+ znyf}$qNJdruXl|`I4gDGQJe9TiDk-BE(ds-7p@k#H!Lo*KR#VL-?a3oDQ}#4cC@t* z&q=o^A8dAs<|zAwY?MB)3--O8B8l+GZ8)F*nSN0eQN-n6LYHZue`G}4+D1bH14(Z>vA{l`&ga@v3EQwaLNEme&nLHjaTcUTf z=$W28QBLkRzU_Z8ne%Gl0pk=|uDU-=;!U>141-vAS_X*^(81Y}`+PgsAu-41JirCl-)Dapl0=MyhOvcOHyzBRF zQIQM9C5OtsXee%UW-*o1vlRIzsRR>PDTxtUxV-LN5rBMj`7;8>FWNr&Hg|s$-(4Re zuGOLc$l;>rrsN%FyP)2J5beOwNUutw{i z;&^NbBhzFetkCUucVEFsC)jCYLN!>Jg=;L6sr!>dT~l?bD1Y*rP{30-;|~) zcnSyGDR}hKOUv`yV?x{dIj!60L1voaFZiQ7(#9kVuwUu;=58RYQ*ieu71)A0 zu`o(c;RD9zP=mN8qXMfPX1E;sRC0}ds*;M3ECKv$iN%}$GMjqu0|X~ zAfH9a@L^276~e;!gIQsLp)4mTjGkxk87cUX8N$dD6Rb_?-L9rhsp%}%Yjnqk)pV@U zsaqTD5gQYcDhCUvTIJ%T%PeJ80H^n>E5=5B#d>jTLixE|kbVa+98Ko@%`2R^$Fcvt zdNj=~WNJX*9pFH*(pS*Agb_=X*pA@m5H#_#ffToB#|Xw~-fztO;17w6Hm~{{ZP~=T zls{#^<~3sJQZCgNrc~++dmScQCAh#0#>c0RRh6?RK(cw`D1T}lECWa1E*l)8W5X}P z-DJxt<;EiTxqy5Z_S2j2ppEiUYy>a#m}=hIJVqh9pj25lnky<$d;S0o|3N(eyvV~r zxfV86oTJbB>;a$H+o6Zh6jD6rdoQ&RjHD*P?uT0P*LUvTj+$eMWT)sCC?}_v_=Lo< z60!>CeDtQ=vYD9i)8hwF`2^OFELr&EZZ8Pczw!iT7wKGX$p( zpu}2a%!qEkxz@P$7<8@97xx8;Ap;FotWcC}(vUgj-W{oqht(2K?8erR~YwLod zi`CP|(!Ai)keE%QsPkFFK=*0|qFv(n#!y4%X`}d4F3d^n-wv$OU&d`6If}`#PV}Bf z*ET;yrA|8qaWZFif6oXI{TVvtm3A+#S|48;=>OD=KM8hkrDQ6fz|I^eF%xaOZttTz z!k2X}RzGsBRB3_nvl%CzBk)%G4$V#HRFTrHQ<$IYP`tW!;8_!EfG zc^YBY^*}uH(N%2H>q-0m>r~o`n-r!peW{Y3->T}+7EySbUw$q$qT{ zsfW!BV?OGQeu$saNIa1!zLe=c*B;X7kc=G)SgNy}2%R@jsLC{tG~@Kd?A%WMS1K+Z?*{4e0`q?_;je}HYYibnz&}d} zy26`(P~mStc|}m=5ssz~NP8zchb!Lr3#zIc+hPc*q9#m7QJzO0VP*WtSqtHSH2+%x zW^9YJaR+eyt^@q4y4!=WOEiixO*N*zA?7eo5tw9TC5n^Z(T~ zNC!tJsHL&P9|x!z|5XNoe}m@#bU0@Ge}|9)3X?K|L|Fg?bdV@nlq2$w;FoC~q?wbY zBZfvpKv&cFf8W9oVZi_P{@n_Ksr`Sq|6L0Lg0EQf?^{F&3=kH*0{K@rrs{tig^<`) z&qBf&2rr0vih%$Skt=m!K|##8uTGFZw=fXX9YzNT5EKyw2!S8~K_KXto{;brRKMy) z@b~_}P;4dAMaf3D!aH}nmv044zeBeifawzou@I`Z2&SaAQ|@=#2M zj}y`kC4ZH?z$JeV3rB?l( zw@a8pZwCaI-5y+)`pE8o9Hx6-o$5;+QuQ6K9i<+gsBK9djq;x!2IQtm(mGM_7p&*F zYvCVG)P4FY=kD}fg`oY4r29PQnknQ%eTS@LsANfeam$pg`%!P7eH)eguN?1%2SJ;> zP9m=C2W!AiCI-o`Fp*#GpM&S=8tJq?Pfgt{23MOKt<3$b{-K5C?zMDb$Kx|~hsII* zv3mdGM$`L!!#4ga4jXJY#13+V6ko4N44fN7!~#N>{fFr|=Hnzu&Iq$q#Q{WxinN`HYqJqTBX4aR#REH$)RJBmg7RqJhb}YSXhtEF+ep zx!&WE?Sct&l)A3V5_(D2TJ$0OsykRE(FdEXoc^XxfmxNXl_-z(s~}`gY?vFDm&bPA z9sDx0u-a0`PJz>?ajmq0kC9#GefbrWpIo`G7m^bMSiYaaXTbi2oDsI z&L^YaxPBg0;#}t$^dg(Wx&&RN8((%2bPoeC$acerMe)tVhLx4N6YOH&yT!FgVz^zc~xrJ0&n+z zt_@H^1FR`6vO%&E>vmk!BAn&Jvm2K12(s0z<}qp5<}CntD0qG%+r}D4ZJ@^EP*o;m zpN`IXDgD&|HUycMtyrEqWr^^iM7mKiko;vAm6M6cKwA$<)ItRTg?H!RzQf``3wjo5 z&Oq5IC7}X~W4ey1)~wwW7M0sW=ZK+MBQlU|~&@8_Z1oaWXpz5>JrtX59-Utd(5 zfCs#J}?tG_--U)WpnD}0{iL9d6AZI;UCY>8+W5uu6P}6p>xZKgu26dtHcP?wY zrV55C0xg6y94ea>GO{LAONQ+xjJy29HtBOBXRs*LC6Q1r`?-q++!w#g&Gdn5Kq%SC zX+-GdNEWE#t=LRLWpl0SFezdd?*_jQp?Hy!i(4KyC*3JoiQH_Pr0+k>l++&@P7z(X zpOugLTAe0BViQeL=hJOh!7uhmtcUTSb;`PlIoyspShe3%lwERL2>7lk!;c)t4Atx+ z+@$DNy24?tazZkTmJbWKjg^Hc^lTYx<2Um{;d?R7p>j(n<#pUzY7Od@*VoPLF!6CL6lTvX3SC1j-#o}uR* zM)F}WWg<8aM^95NwRpfHa{Bz$i^xFP;%6m>?bFf`XXla%nzGb7qd4Nsk8?N@?WpX% zPWJ?ZvpGeC+-Cgv%#M!|^7Pjg1dXZ{+rEfdYMJ`Q8V-q`#yVcT#>sZ4%hreSYckI> z-7Dd3K~_8zsGaFwS?2NVDL*|WrKjU#S^{x_xt|Q4itPVNU%wNUgolfin#*Id0tg5^ z36BdK1o&70u!)HPKmaxq$m9Mog*{&QvuBgEw{!W2wHXVL1qk5!I|V)(J^uc32L6{b z0Gqm}0~Elf3^9iSXaJz!&Xplf(8sz!f8Wcd05!FQh}wGqbRQ)kfENe?u!BJW{omGN zkA5G`odHi7_)of(am!N&Ks;s!(TpS0@vwGvFy~s{iO2_hb1#z5;mu)vW*RE8r=xe?AvL01$|q zhv(nE3pTtwJ*H>AUbVM;=VXtOOd%ZeNG7xo!mncQ^Vi13tA+o8Ad2?Z$jB!A1p$8T zH}n8%s3A#0Qa;Yq+n5AuBMVVvOu+P9N?zRyZVEHEXJLM`Aa={c;I!r?Q|tr;Zg>#-@U2FrR~vkL;Z zDgkjz!I(>$)kc?Xm72GNk?eA-x_jnh<&^{%_bS{Zo&L%{O`+G}Nx#nfsT|r#M=y%s zzC8TsLY6I!eLlss^_9hEuk6-mn|OSgyU}b0O`PR}$THExnkRb#YnB!}JkrHCn7i@7 zqXf3s>SG4)gcdSApQ{vg+Y~S*`2bi#;DUpEUsl^2XCzBb&{rP3`64LIyX@FOWvKSq zO7is!t??pf3Yq=eZktWL+559r8@vS*f8J0bNen6UxL`64Cy;7vXzUW616O}B!wU+> z)D8+|JTK9ubh2Ryy)G>Rw+&h*`X9qeS`{whxd#(Wq_X@1CAqonwPMXA8mvr-8ipz= z_i{t*I_&fUFT7psMe39wN5ggMTrrvimuP+PDveTwld9}gRsnQ+ul=uV7pol4HY11| z$oRvz&OIxt55Cto^lkkjY4h9xa$1chHwxdZyLUeH4t>2bnHn+F>$K|-6f~pN)S?RS z1BfpnUcVfll4NAFK$CO|`ap1mP&l4-m>{098ycs_P42kfa(Xuye;H|UbB&&VKo;kI z?KJ&n?FHt~7xmICgWSIuq(7nLW4_ZQ=*{)O&5Zeo5BwPLVi15}3;Zk?JbO%tOy zeSyA;*{c|&9cg=W8J;TyNhw@LFN-9pp#GPw}sZ*WLp+8pA)Ut-1Uh z&f4$Fn`*`af-`Ejna_6b)|47j0!kRcj-#cVUs+9eu4lGfaEDbyh2l&siUvM$HFNU} zVoP_i>ml{eA!mJHO6ah~UGKy>o&5>fU~kbPl=EP$O`Ga);J-bpJ&szvVT}()-y`svAUn+7`f^?t-8B4_3(wkC4#M+niWr*Csis{ z=tfXY*xubi0tj}Q(FI~|EekHwARrkxY>^ksbXRe)zJ^sV>RM~>IJ~He$YDz-xsKA5 z`_MJJVIQyM%_Vew9(gb##|3}<@tM>sOc&JU*hG8ZR!Fawq3`uY?vV18mpq)OOWgZD zVg{Te%U8FG1q0FJZ#(Q+?^q)~3!fFXt$~ z?>s&^g@)*eY%4Czw>kX$Sz2fya`}s;?ShvV?$j(htgqd&Sl!E`N=z16bs+mwj2ICe zuPEeunBV(LumaW*hM@PetVL;UH)*H0v7UxXN~cE2Q63+riZj4O(dBxUdpyfakelRI zB%zb!0Ahmf!3(8pO9Smh*liu|nrAI8s{7Ot4vJ9^=g;KP5ROJ0+Fn zSr;zPyiB{yxJ*~%IdE4HoBD-^m$%ZCy0vQ1ROL=CHKU2{CIU|7eD_HT8HA>Vl>x4ABCf*(Ld7+of0Q&m03uK6~?EWDY=2J?}T}Y5l?s(+ctf z9`K4FO0V08B*CbDSHtX;D-4_WOTD?ls(aT8sk$9fikfSn&57OADb3}pmGhBOuKS*t zYd)i|xSP7~fpQVle0^1Dq1D4Y-IGa>tz!=gY@T?@xV$ub>%BV65-P9!kv>|v!gKC4 zQ9pKG+;u#bW*S{PWSK=LR0^DKPtgo%T?gtp6C0kGvg*qu2gqwE3VWIM3Wt2^(OUB! z(0|C@@80ywMKGi3%`9T0(3i{@FJk+^V0km748j`agcUOid20GC$cyrN0egsoq3(p^ zNQpbCAF#r()A0%ycagioaKh&b_kvh!&i>YehcnV@kB@wIjeXO4T2fO=G0v2Sm(JK) zpBY<2v^kC`-FC*AZ@JzjUZ(g%Yh_5in~>F-VTJz9P=!XRTXN;dXZ#aplF#^CBW2*k z)UlMC;SG%w?^)3_G%$Hr`&ooPwL@NTGDBGx=X-@Mi8+y67lZunZo)mttPcy^k?n~Gyn&sH zslkdqOJJ8Z-^L}HCXP1QeoVW;7*FN!OBB&qaqg;m5(s2eg-bouV82xn(nYgsI0i&4+5+<2z_>iR^3N#3^OLE3Q-b zed5u^cd7I#<$uQ#u^9S>Xv`SjUzEiDM_ma-YQ*=_AWPFDr_0GAWR4=1^SlPVE z+-k$?T_dw&ah19rBPYGw*Zjj?ognk}1_Q%A?LAvrn}pPOt+#7G9Cwo3DYKODBKPEa z8z!HVnB%7DyozX^YbNJ`7pi^vuv#=1+<4ID2@UadJG*D>E82}a#waNrQL?))C_(9L z`+gf?%bog(HgS$C*@A~B{HIQ0p3{4`07FUs76nzN{WVQoM%C?lw^a9XgZ90_dh1PV z_R{0EpAaOPD6Nq+bIIYwO^jh_fk?`p_7|5){+uRxcxxV`q-&m|xm6^kOe9|-&n)aB z`zVW7lamd4MI0hfVZE$KPEMR!cIB>$O4#+K6SrNW!Y7NX6fC}vHHryXXnhZR_2Brd zc|Xj6&?C_k$mQ0<-PiEJABt=JJkov0PB?J|y~VrfY(ZNmVQEaTM?VjDC%0MU@Mh7Z z4&=q4wVQF6k)trN$ImBRxd9I@YhVfGq*?T_RLzMUf>2KbY*w#yyDxg8rpbn%xt%5Yeh?>A#2!rW+D$>ula=P>UlhUnJ=#T3H@K9XpbZ_%h`-$LW z)g}`P>kB5jikSzP7Yb3lTi4zc0Ojqbn2}IjL`s`K=T!N%UbaX;9DA$FiMec-+AKfM zlrRFSnvO2^n#Rrv;x$~?(s>L0PHvWcPUq$u7P_Ft;7#W)eGDSopYK=f#@(4B5%4L? z*k`dO*7Z1-QHi458gvCvyT#~$skc-LLT^F>ylnfgM8m{7-0&H#8)ddrKc9NNAWli- z-uEMIyZ)jH$db3MT>b35@q*MV7xCj5VoS3tOPL9F2`8AB%_;1KfNW0cZ9O1v^vDfa zap5H|4Qt>pe;YDRVZuOLZ=8hc7c-;ao*|W$Z;8UsV@TmE;9?3EXe?`3EWve6dBRL$ z&?KUmz)wUDX}r`XObc#u`r{Hz(g$b)tM_v+9TPmW*EUhh%gZZoUA~oMhx_gS{61n` z%8&7#{k7-1MIia@;f!nTdjmx&Wo+6$(*;q;Tzrx^ymASK|VJ67EVof7DOnXbO*R{f%C-fkV; zCaqn@iq5R4mp1)OGht``Ma<#yJ_{8ZGvll*5qh+B0l1vCZ1-<4yznSLSZ za#S5vWs=i&mQ{2li};{H9Dd}g;a;b&K10xM;D2rmVR9fwDK6^m&oU0AUrC}zS*zN9 zg{lvnd-sDx^n@c!oi3Ut!-+|26ytdgd({XRZWU3Owo7z6zjQoZR|?4sWD4_si61>? zHl-#%)Wh08348uf2hud!%Gy~ts)4boNxJTiE#zzC5_c&svwwwB6{}F1&n2J$o+03} z_Birsy=pg6UEdy=w6~v8NnPq1*xXwFu}3|Aoj;(t8t%C+Tlm`6*Jd7jc&dg5pU(^@ z6t1_CM_;kx-G*Kj@vt)henffSD13qko!29H1tEH0l9etA6;qJ9ADf10YZ*TMbDk}e z%0s?m*SY5eI8=psYlBkneQMXp4?Jg*u`^N1^nQ=$QAq(cJ2tesFjS5gV%LN89Fx40 z7nqE$Ba-nIz30Ye$gOdWLB;II1?2bMcWZlQcgvP-WZ$e4Bl$yg9Z{19IKW#Jf$NDA zjA&?_NBXTzAEQ2jET@t2zd?9Ty`u}iV36ARxmhh7bQ5%yF zwTi>H()c%U?fK$W;m(b_UDSALSF_d!1Elc8mOp%e+>NV!S=sN)f z%)x)fXJUgSUJpGFX-+B8p5{gBTx$D)xR>HKRsWiq=;z#A)QGcCliTnDH%24p^6SRC zu7bm+2C`FWtczDsgr1?0fhZhAyJP!})Vkw>x(Eh^Wa2t75e_(5;9WwC^KBTAg)MeUvZ^OiYSeHP$tWmpriw+~+(&W6I{z#EX4()wUU2 z_Nhnm-GmRx{!@dosP*S&K} zR?iOXR%t0kfzZ6vr2Mi^Y}Mi8IdnmYy1B~Y;qq)o*NR=p=SE4Z@_wf|>h$E7q2c?H z*Z#4Fa5`6Qj+NJIu~NhMBbmuxhVgYTd)BD0khW>a?|PcJamLd)QiG5Z%gU&ugUc2P zSD)(}Hw7D*ilu11@>L*NmmYa>f;llq(IImGo=%H+l8T!jM`mnfxRcHx2aXjk)}ryX zKK>RWA7UB0{Ahuz(9mwMc%hVaj*^6K?l38L%NMs??<`l5>;$?^8PATkx6VI!chZjZ zx|555FXq4RRLJ)33<>WoRA@~Qz7tM)Sm?N;t#2$VI5}yjU9K-IJJtA<@h$g8W+#Vc z%h%^DqJ)ugZO-*e)N3d+VdDhAIcV{;0%%AWBa0CL;ql5;NV%sDScQb@u3}$ zVf=^nL@(&8pU`#arF=&Ee2ZVEOryO0>uRe?cy4aGk@SZ!j04N;OzyeZ(D@D2l_*1h zeY4f~py`S|dQ~lml;Z-!KC=|a%(tg?;U(&@`&hHuw`jq*@Sn^Wi5Y3{Om{Q!-E71V zvaUnABPaFeRQ3lWaRvv*7zVUI)>4L4(;fw57HdzaH0GK#=ri<7&Mn3VwvZVo#BKCD znZehJYOj_}`-((G_nsjF-C2&;vs76N-U;|X+lmpm)m>fWZe}^@v~ghB_W1OXSq_#z8mTI zBf=@m97{y+=zcsKv(po7tT^)b9=l(kw)fghGQYBU(^lOAFY|y)jx5tt&>373f`pR zv7EI${O3#jqh{6d& zk0KBr;0;>D?U_(g<6pUNFk(p=bd|jO1|PyhvG-ob!h~PTU4p@AUKIwu#bzgeBeXnp z+BORH8yS98aPQvoYHN4Uf%i!xtQFA(EUTc!A)`mZuVd$%6PNkLZRxG5l2{E+d&8^` zCH6u-m&LKdy_xeJW+n(mHeem2^B-wm{=6mKw%d}&cHd;h)Q@So7 z{_KWD?k3(t2Wxgf?o&^VILh+I$%DLz63t;f4i1|+!_4kYD?dGnC?k4Cfn>VSNiRN~ z;+l2g@ZKiz`2C!t&+|g7b_WQq;zs#k z%nwyl2^9_0tUkmqA$=i>epjqPuT!cFO!M+7t5{Vf-nm5^jIpv ze%5<3;_umD1b3R|l4Onj_teqU{CDF->9cNM!**9P;$ULswHhnpi?-zPP^V;j24Ld` zXMbs?{s2(GLsHA-Yf65;^I_kKk2>1iLKH&Xs*Pa^=SB%U9(=`*dwiEFUR_!>>x$4N zyd5!RJ_6Q_vTY!<(06=jRFsV)(DFx76=7Hz%&N(AnZ>lqQr~V-VswjF#gKccf0mcD z2Pz)j*Z|34Q62|&$)v@)pVW}k7_-UGirsj2X33>cecEw{=E$eXKx3Hs7L#`v^Sok( zRPCh>|@RcFfN=}uy|H! z7i126EN=ugYn?}sf#kr)HNKqsSRRjA$12b^uOZe@8eAsg7!Ms3FMvv#K@}0;`(b%i z6k}pHa6-aONyJ6cLQa9!8&w%RVbu!&n@*_%^fIn%P0P#8o~1pyJ32*RXVW_~KXqLu zfmYYCfXq61ZC#$wYoknFkRWN<1M!)k6sp5+!KUHe?LMywf$N-3EU;%t4U^qv!Aw40 zheQ9R?sJupMI)KFdx7zbf$(Z9Bx9?1Z#^@n5cX0&?;3%#?5-Mr1QDwANyBlm75xo~_nqxd~nc#=F-)A=0^*Ob3>X8qf0c34vqX<;S1z z@jg5E@am9?halLdZ$q&{JO^dcI6gbWF*(I-8@Z17?v-MdqrL_Bt>b=T*;NhvTx#h> zyBkpZRvx4{xq(&rF(V1+>lbq_Q}s<73?O4CM4Hx zzi~ff55KSTN(ua}`}XS!lLFV~jdDtMwiBZ6VTMV22!cA^YweaZvhQBR$(dO7jB6(x zL*`ec|*7(YCbX zYD|i6=sZLqOb9}o`&he$G_WC3KB|AngfXb?S|m`LHXJ?VB+4M_q?7P|@-v&JwiNZ7d z_lQ^f0&nVEz0Sw83pFc!zc>3^jvid>Mrn1Dpy+wr&t~e7T~0TAZq+fY_|~>DaKS`Y zYc5(gh~flDA;P%9$?GD-*NL?I1Rm~^J2CMTikP_;u^&Vn0-F@qK)A!-E6{#G&Z^5)MD@#`BK|b~XBV!IWZ}*X>95O$QsG`lg<~ zhmvfjD+rujcsGB2k8x*&#q!pbFL){7Qixy-!n=lkX{u|isq90GRMRe##H^N zm-|NdC`)9UhtG(V{V3av;0mS%#+H7;2aPY3^#|RA&@HS^FjvmII?e!7W>JBPKm>s~ zyXBma13KNUk|d1Rq#5e)X9CFX%lN`kwa3r$!qrz)lPKO{d{l98qbbQ5%97aUTF5Yb zgO2kRY3wM4%`3_)X)uTCCyc-suRd>whi4X;{kpI*k%194>vW0Z20vhs?}n`{(6|wh z;D%VlUaneEF?6+lGgS{xhz@lKIdQO~B8p<`QtN_5);M-2607PIhfh>%uJPw*H`0c9 zZ{CY>=Q(6+s!#5gVUV`EmgdDNn$k9cWy-08?DN?t?~f~4@3YMhzr{={@6A&MyDz$m z&pGgLXXBuklcfcpQ3YR#I|_TPe`|=$CRa#AyvK$2oVQ!*PVMMy+43|#hd844CQUsf2%JtwIYwIqANQ(6U@1I6*;EW7K5tAn5U_7JXOoh-SF;4U1bcD}> z4nADmhTXrl{>L)-M`dHZKS~7qjLk2gcIZ z_pPxuZNl5|D9j6@r-u4vLeoC>pJ=&kdc?Km#G0s_vd5Be4h+#=7t2H@tvl>VZyvi& z^2LyV2R=d9C1TrFD96YhfMa{>{z13Yvc4o#WBDI_R8IHVL7UpK0xzkAZQMY@!vOEZ z55r4nli-AXm3|Db=tXPQq($26d)9Z?zTmn1kLmn+!=q?5&-xA=qDc|jNjsYK>E^Y@ zM_4Cq#+A%J+Aa^7Hzb?x;YG})h&0%^fps}*HVC-T0+w#$`fc1|M!o>Sl2b~d>&5IY zY9ru+o#%7sO+7ZM1GGU)NK(W%u`l+NfW-1OzKa}YcgQuq@a=ZC2(@-Z+UDcdA{F(7 zrXyDI+OeWDB(1J}O%j!w1F|0Hg9O(GLY|Xy85Xora;6f@ePzIb(`J;~1tl`wO=GgK z7%1zhMeeXbNwj1#2S=<|JTvY`WJ9r>VwpYU2p|zcKMRkJOh(WVg~1?k_KPq zIl&9IQU)apMhtUpe?U`34;rkDeH}i0`Z1X{84^>o)1Cy5l|Pao0}XJ*3J4^~>q{0S znVveRTE&FOgLj^Lj#IxhUWZM8LnkYHn3x;`UH456@0=x&J&f&7A|Om3WY0|HoCZvFYx)xyL1>^5{6ZrMLPY3Q*@LFh-yYnxz=S0R2(FKOxlJ)*3S5jO z6o^D4Dq;`niwSrpZkDBpKy(fhea_~@epeWshJPlxqQLOy0L)0F3BQn)3x;K)dx62S7f zo`YM4eOo=_tr)Vr2N5#725sCcQS)tp0CQ2y3JRYDH3LF=nq-V=Op+qp{To_fz|Yl+ z`!_L0*yIA!=Gbhe!!4s8C7~6T;T10zvp6G~AZa(xRUn=kvGi4H!PtcBiK(He70vQ+ z*L-X80L37;B=NvV)ALeV%5bbiEDA{vMW{!*10y7v{xd5T+MB3QY$7|!D{2QdS!xeB z5!z2*9Beiu)6O0a5brwiR+fDMgLmXHF;-3gO|v|~6@SqzJRDq{e*-4G|BYdQp2(5^ zKN?d%=9Aq#5H0w~7lm|Q4DzKW4-W{3b3hZ~_PIvTpJk#rK#~h*>tiGNKP5$ z1hQWdMN)(DXINZ@zAKe@LTt0kEf5dhhLHEzV-PUo75ycL5`a^ni39?TW8) z)_o$?L;lVa?u=Ra8AkUc7!vsu|(d4Y-qAv}sdo}UAyF$4qXAS?VQET?rT=8v@?n$$lW z2mRXajK9}i7z#t{n$1j5`Ivl{QSHIyj;MK^>{~rC{ zWBS+da{zh&9(~XgIrKM$^N0O;f|JCd&L&Ql4lediPbklCUP%#R`v@P&YRYJ9DKLpc ztst7NYEUOjvws;#LToKLL0JDCjz8788oT@ku+$&1 zr>9W;Rz)Gs(5LGD@6cE}IlG8iK%D*#fIQ@{G6?*K;rjQ0Keqp0vroeA5r4L{GY7C~ zS=x!%Ia~fM{C2HnY3gF({0MrngPywa&y}4Q2zc85xpJ{{0sgP;j~)A?#nTxN`#mpHy^OdXABCqgUJjn$Di`pv>PK53fPEE& za{6@k8*zI?)$BFwET5=xz*A-a9NYhX)Spl~=ijlEvow8* z9_XJip0bm@sjJB&sZMVKv2?NrfLK9XtUv(0g^P=WGauU{_4lZnJ3$;QEKQtQ?VZdS z{!9k3M~>aa(%w$|i5R38=K}-596&BG7myeH2(>W*dFX&ZhJQN!AKdV70#FR{I6dsm ze>ePh7yjb^{->oLPEa$H$5G%w0sd4z?_vxMRv;ge>(tscy&c`VI zhsOOhqW`V|fuP5*|GNg_ej2`i(?B2&_D9t4-!vc?_&DqTT?0J^;D6B`pQ-<_1A*B8 z%MSF&X8xBQFZX}84+MUU@c-DkI6)p?08YREgP>;V1$~Ou)2mL^-u`jW|FDWrWf?oO g$FY69@z0~=>;iFe`5oQI*~{_xhXxu6B}tV32NkAwbpQYW literal 0 HcmV?d00001 From dcb8f9a2822946a7f2f4637a052eb9ad578de13c Mon Sep 17 00:00:00 2001 From: idoleat Date: Wed, 26 Jun 2024 15:56:10 +0800 Subject: [PATCH 25/30] Remove the thread santizer usage and diff file Introducing thread sanitizer here may be an unexpected pop up for the readers that are new to concurrency. Here we focus on rmw atomic operation instead, thus related content and diff file are removed. The proper place for this topic could be a dedicated section for "testing, debugging and verifing concurrent programs" This aligns the decision sticking to C11 thread as well. Co-authored-by: Wei-Hsin Yeh --- concurrency-primer.tex | 28 +++++------------------ examples/racer.diff | 52 ------------------------------------------ 2 files changed, 6 insertions(+), 74 deletions(-) delete mode 100644 examples/racer.diff diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 1d9ae0a..79dd3f3 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -550,28 +550,12 @@ \subsection{example} They behave equivalently to a \cc|do while| loop. See \clang{}11 standard 6.5.2.4 and 6.5.16.2 for more details. What if claiming a job, which updates \cc|thrd_pool->head->prev|, is not done atomically? -Let's remove the atomicity of claiming a job and use thread sanitizer to detect races. -Thread sanitizer is one of the sanitizers provided by \introduce{gcc} and \introduce{clang} to detect data races. -Data races are undefined behavior in \clang{}11 and \cplusplus{}11. -Thread sanitizer inserts runtime code into the target program to track memory accesses. -When races occur during execution, warning messages are printed. -To enable this, add \monobox{-fsanitize=thread -g} to compiler flags. -The following diff patch removes the atomicity of claiming a job and uses pthread instead of \clang{}11 thread, because thread sanitizer currently hasn't support \clang{}11 thread yet. -Save diff as \monobox{racer.diff} and patch the example code by \monobox{\$ patch rmw\_example.c race.diff}. - -%\inputminted{diff}{./examples/racer.diff} - -After compiling and running the example, you will see warning messages printed and same job IDs got echoed repeatly. -The top two sections of a warning message indicate which two threads executed which function causing the data race. -The bottom two sections indicate how these two threads were created. -If the race occurred on a heap block, a third section would appear indicating how the block was allocated. -At the end of a warning message, a summary indicates the type of race and where it occurred. -You'll see that two lines of code, which claim a job, are highlighted as causing a data race on \cc|thrd_pool->head|. - -You may notice that there is another part of the code causing races. -While some workers were echoing ID, others were attempting to free the job, resulting in heap-use-after-free and data races. -This occurred when two workers claimed the same job, as the claiming process was not atomic. -But even when jobs were claimed atomically, this still can occur. +Two or more threads could have races updating \cc|thrd_pool->head->prev| and working on the same job. +Data races are undefined behavior in \clang{}11 and \cplusplus{}11. +Working on the same job can lead to duplication of the calculation of \cc|job->future->result|, +use after free and double free on the job. + +But even when jobs were claimed atomically, a thread can still have chances holding a job that has been freed. This is a defect of the example code. Jobs in the example are dynamically allocated. They are freed after worker finishes each job. However, this situation may lead to dangling pointers for workers that are still holding and attempting to claim the job. diff --git a/examples/racer.diff b/examples/racer.diff deleted file mode 100644 index 9455c4f..0000000 --- a/examples/racer.diff +++ /dev/null @@ -1,52 +0,0 @@ -4c4 -< #include ---- -> #include -19c19 -< _Atomic(job_t *) prev; ---- -> job_t* prev; -29c29 -< thrd_t *pool; ---- -> pthread_t *pool; -31c31 -< thrd_start_t func; ---- -> void *(*func)(void *); -36c36 -< int worker(void *args) ---- -> void *worker(void *args) -39c39 -< return EXIT_FAILURE; ---- -> return NULL; -44c44 -< return EXIT_SUCCESS; ---- -> return NULL; -47,50c47,48 -< job_t *job = atomic_load(&thrd_pool->head->prev); -< while (!atomic_compare_exchange_strong(&thrd_pool->head->prev, &job, -< job->prev)) { -< } ---- -> job_t *job = thrd_pool->head->prev; -> thrd_pool->head->prev = job->prev; -61c59 -< thrd_yield(); ---- -> sched_yield(); -75c73 -< thrd_pool->pool = malloc(sizeof(thrd_t) * size); ---- -> thrd_pool->pool = malloc(sizeof(pthread_t) * size); -98c96 -< thrd_create(thrd_pool->pool + i, worker, thrd_pool); ---- -> pthread_create(thrd_pool->pool + i, NULL, worker, thrd_pool); -110c108 -< thrd_join(thrd_pool->pool[i], NULL); ---- -> pthread_join(thrd_pool->pool[i], NULL); From a6c68a90ee5b80cba42f22cdd313182343194d68 Mon Sep 17 00:00:00 2001 From: idoleat Date: Wed, 26 Jun 2024 17:27:04 +0800 Subject: [PATCH 26/30] Restore previously removed spinlock example Since spinlock is added back in section 5.2, the original content is restored. Same as the rmw example, the goal is to provide easy to understand example first and improve it later on. --- concurrency-primer.tex | 54 +++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 79dd3f3..7457ee0 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -789,34 +789,34 @@ \section{Do we always need sequentially consistent operations?} they inhibit optimizations that your compiler and hardware would otherwise make. What if we could avoid some of this slowdown? -consider the example provided in \secref{rmw}, -where an atomic pointer \monobox{prev} in \monobox{struct idle\_job} is assigned an address in function \monobox{thread\_pool\_init}: -\begin{ccode} -idle_job->job.args = NULL; -idle_job->job.next = &idle_job->job; -idle_job->job.prev = &idle_job->job; -idle_job->prev = &idle_job->job; /* assign to atomic pointer */ -thrd_pool->func = worker; -thrd_pool->head = idle_job; -thrd_pool->state = idle; -thrd_pool->size = size; -\end{ccode} -An simple assignment on an atomic object is equivalent to \cc|atomic_store(A* obj , C desired)|. -In this case, statements above line 4 is guaranteed to happen before the atomic operation, -and the atomic operation is guaranteed to happen before statements below line 4. -However, this series of operations are filling fields in structures. They do not have data dependecies so they are not necessarily executed in some order. +Consider a simple case like the spinlock from \secref{spinlock}. +Between the \cc|lock()| and \cc|unlock()| calls, +we have a \introduce{critical section} where we can safely modify shared state protected by the lock. +Outside this critical section, +we only read and write to things that are not shared with other threads. +\begin{cppcode} +deepThought.calculate(); // non-shared + +lock(); // Lock; critical section begins +sharedState.subject = "Life, the universe and everything"; +sharedState.answer = 42; +unlock(); // Unlock; critical section ends + +demolishEarth(vogons); // non-shared +\end{cppcode} + +It is vital that reads and writes to shared memory do not move outside the critical section. +But the opposite is not true! +The compiler and hardware could move as much as they want \emph{into} the critical section without causing any trouble. We have no problem with the following if it is somehow faster: -\begin{ccode} -idle_job->prev = &idle_job->job; /* assign to atomic pointer */ -idle_job->job.args = NULL; -idle_job->job.next = &idle_job->job; -idle_job->job.prev = &idle_job->job; -thrd_pool->func = worker; -thrd_pool->head = idle_job; -thrd_pool->state = idle; -thrd_pool->size = size; -\end{ccode} -The compiler is free to reorder instructions and the befavior of \monobox{thread\_pool\_init} would remain the same. +\begin{cppcode} +lock(); // Lock; critical section begins +deepThought.calculate(); // non-shared +sharedState.subject = "Life, the universe and everything"; +sharedState.answer = 42; +demolishEarth(vogons); // non-shared +unlock(); // Unlock; critical section ends +\end{cppcode} So, how do we tell the compiler as much? \section{Memory orderings} From 4dc5699b13fa2534d16902f221ace7d25830ac33 Mon Sep 17 00:00:00 2001 From: idoleat Date: Fri, 28 Jun 2024 03:55:44 +0800 Subject: [PATCH 27/30] Supplement statements with references to C11 and LLVM docs References to C11 standard were added when explaining properties of atomic type and operations. More information of codegen on atomic operations is added as a footnote with a link to LLVM's document as an example. --- concurrency-primer.tex | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 7457ee0..96e70af 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -321,8 +321,14 @@ \section{Enforcing law and order} This model, defined by Leslie Lamport in 1979, is called \introduce{sequential consistency}. -Notice that using atomic variables as an lvalue expression, such as \monobox{v\_ready = true} and \monobox{while(!v\_ready)}, is a convenient alternative to explicitly using \monobox{atomic\_load} or \monobox{atomic\_store}. -Lvalue-to-rvalue conversion (which models a memory read from an atomic location to a CPU register) strips atomicity along with other qualifiers. +Notice that using atomic variables as an lvalue expression, such as \monobox{v\_ready = true} and \monobox{while(!v\_ready)}, is a convenient alternative to explicitly using \monobox{atomic\_load} or \monobox{atomic\_store}.\punckern\footnote{% +Atomic load/store are not necessary generated as atomic instructions. +Under a weaker consistency model, they could simply be normal load/store, +and their code generation can vary across different architectures. +Checkout \href{https://llvm.org/docs/Atomics.html\#atomics-and-codegen}{LLVM's document} as an example to see how it is handled.} +As stated in C11 6.7.2.4 and 6.7.3, the properties associated with atomic types are meaningful only for expressions that are +lvalues. +Lvalue-to-rvalue conversion (which models a memory read from an atomic location to a CPU register) strips atomicity along with other qualifiers. \section{Atomicity} \label{atomicity} @@ -465,7 +471,7 @@ \subsection{Compare and swap} Finally, we have \introduce{compare-and-swap} (\textsc{CAS}), sometimes called \introduce{compare-and-exchange}. It allows us to conditionally exchange a value \emph{if} its previous value matches the expected one. -In \clang{} and \cplusplus{}, \textsc{CAS} resembles the following, +In \clang{} and \cplusplus{}, as noted in C11 7.17.7.4, \textsc{CAS} resembles the following, if it were executed atomically: \begin{ccode} /* A is an atomic type. C is the non-atomic type corresponding to A */ @@ -952,7 +958,9 @@ \subsection{Relaxed} if (job->args == NULL) { atomic_store(&thrd_pool->state, idle); } else { - printf("Hello from job %d\n", *(int *)job->args); + void *ret_value = job->func(job->args); + job->future->result = ret_value; + atomic_flag_clear(&job->future->flag); free(job->args); free(job); // could cause dangling pointer in other threads } From 11baa72323ea3ceb38bb43106829e8fcd806a4bb Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Wed, 3 Jul 2024 22:36:58 +0800 Subject: [PATCH 28/30] Add atomic instruction and simplify rmw_example.c Add the description of atomic instruction to let readers know there is a difference between using fetch and..., which is only a programming tool, and its actual execution as an atomic operation that depends on the compiler. Simplify the rmw_example code to provide more flexible examples. - Initially, all worker threads will be initialized. The main thread will ask all workers to start running. If there is no job or the job is completed, the worker will become idle. Next, the main thread will continue to add more jobs and ask the worker to start running again. Meanwhile, the main thread will also wait for the results of the work. - Use the struct `tpool_future` to record all the information required for the job. Co-authored-by: Chih-Wei Chien --- concurrency-primer.tex | 69 +++++------ examples/rmw_example.c | 107 +++++++++--------- images/{atomic_rmw.pdf => atomic-rmw.pdf} | Bin images/{atomic_types.pdf => atomic-types.pdf} | Bin 4 files changed, 89 insertions(+), 87 deletions(-) rename images/{atomic_rmw.pdf => atomic-rmw.pdf} (100%) rename images/{atomic_types.pdf => atomic-types.pdf} (100%) diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 96e70af..d531e32 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -396,12 +396,12 @@ \section{Read-modify-write} In \secref{atomicity}, there is a need for atomicity to ensure that a group of operations is not only sequentially executed but also completes without being interrupted by operation from other threads. This establishes correct order of operations from different threads. -\includegraphics[keepaspectratio, width=0.6\linewidth]{images/atomic_rmw} +\includegraphics[keepaspectratio, width=0.6\linewidth]{images/atomic-rmw} \captionof{figure}{Exchange, Test and Set, Fetch and…, Compare and Swap can all be transformed into atomic RMW operations, ensuring that operations like t1 \to t2 \to t3 will become an atomic step.} -\label{fig:atomic_rmw} +\label{fig:atomic-rmw} Atomic loads and stores are all well and good when we do not need to consider the previous state of atomic variables, but sometimes we need to read a value, modify it, and write it back as a single atomic step. -As shown in \fig{fig:atomic_rmw}, the modification is based on the previous state that is visible for reading, and the result is then written back. +As shown in \fig{fig:atomic-rmw}, the modification is based on the previous state that is visible for reading, and the result is then written back. A complete \introduce{read-modify-write} operation is performed atomically to ensure visibility to subsequent operations. Furthermore, for communication between concurrent threads, a shared resource is required, as shown in \fig{fig:atomicity} @@ -411,16 +411,16 @@ \section{Read-modify-write} As discussed earlier, the process of accessing shared resources responsible for communication must also ensure both order and non-interference. To prevent the recursive protection of shared resources, -atomic operations can be introduced for the shared resources responsible for communication, as shown in \fig{fig:atomic_types}. +atomic operations can be introduced for the shared resources responsible for communication, as shown in \fig{fig:atomic-types}. There are a few common \introduce{read-modify-write} (\textsc{RMW}) operations to make theses operation become a single atomic step. In \cplusplus{}, they are represented as member functions of \cpp|std::atomic|. In \clang{}, they are freestanding functions. -\includegraphics[keepaspectratio, width=1\linewidth]{images/atomic_types} +\includegraphics[keepaspectratio, width=1\linewidth]{images/atomic-types} \captionof{figure}{Test and Set (Left) and Compare and Swap (Right) leverage their functionality of checking and their atomicity to make other RMW operations perform atomically. The red color represents atomic RMW operations, while the blue color represents RMW operations that behave atomically.} -\label{fig:atomic_types} +\label{fig:atomic-types} \subsection{Exchange} \label{exchange} @@ -441,7 +441,7 @@ \subsection{Test and set} \introduce{Test-and-set} operations are not limited to just \textsc{RMW} functions; they can also be utilized for constructing simple spinlock. In this scenario, the flag acts as a shared resource for communication between threads. -Thus, spinlock implemented with \introduce{Test-and-set} operations ensures that entire \textsc{RMW} operations on shared resources are performed atomically, as shown in \fig{fig:atomic_types}. +Thus, spinlock implemented with \introduce{Test-and-set} operations ensures that entire \textsc{RMW} operations on shared resources are performed atomically, as shown in \fig{fig:atomic-types}. \label{spinlock} \begin{ccode} atomic_flag af = ATOMIC_FLAG_INIT; @@ -464,7 +464,7 @@ \subsection{Fetch and…} or bitwise \textsc{AND}, \textsc{OR}, \textsc{XOR}) and return its previous value, all as part of a single atomic operation. Compare with \introduce{Exchange} \secref{exchange}, when programmers only need to make simple modification to the shared variable, -they can use \introduce{Fetch and…}. +they can use \introduce{Fetch-and…}. \subsection{Compare and swap} \label{cas} @@ -501,50 +501,57 @@ \subsection{Compare and swap} Subsequently, update the expected value with the current shared value and retry modify in a loop. This iterative process allows \textsc{CAS} to serve as a communication mechanism between threads, ensuring that entire \textsc{RMW} operations on shared resources are performed atomically. -As shown in \fig{fig:atomic_types}, compared with \introduce{Test-and-set} \secref{Testandset}, +As shown in \fig{fig:atomic-types}, compared with \introduce{Test-and-set} \secref{Testandset}, a thread that employs \textsc{CAS} can directly use the shared resource to check. It uses atomic \textsc{CAS} to ensure that Modify is atomic, coupled with a while loop to ensure that the entire \textsc{RMW} can behave atomically. +~\\ +However, atomic \textsc{RMW} operations here are merely a programming tool for programmers to achieve program logic correctness. +Its actual execution as atomic operations depends on the how compiler translate it into actual atomic instructions based on differenct hardware instruction set. +\introduce{Exchange}, \introduce{Fetch-and-Add}, \introduce{Test-and-set} and \textsc{CAS} in instruction level are different style of atomic \textsc{RMW} instructions. +ISA could only provide some of them, +leaving the rest to compilers to synthesize atomic \textsc{RMW} operations. +For example, In IA32/64 and IBM System/360/z architectures, +\introduce{Test-and-set} functionality is directly supported by hardware instructions. +x86 has XCHG, XADD for \introduce{Exchange} and \introduce{Fetch-and-Add} but has \introduce{Test-and-set} implemented with XCHG. +Arm, in another style, provides LL/SC (Load Linked/Store Conditional) flavor instructions for all the operations, +with \textsc{CAS} added in Armv8/v9-A. + \subsection{example} \label{rmw_example} -Following example code is a simplify implementation of thread pool to demonstrate the use of \clang{}11 atomic library. +The following example code is a simplified implementation of a thread pool, which demonstrates the use of \clang{}11 atomic library. \inputminted{c}{./examples/rmw_example.c} -%Compile the code with \monobox{gcc rmw\_example.c -o rmw\_example -Wall -Wextra -std=c11 -pthread} and execute the program. -%A thread pool has three states: idle, cancelled and running. -%It is initialized with \monobox{N\_THREADS} (default 8) of threads. -%\monobox{N\_JOBS} (default 16) of jobs are added, and the pool is then set to running. -%A job is simply echoing its job ID. -%\monobox{sleep(1)} is used to ensure that the second batch of jobs is added after the first batch is finished; otherwise, jobs may not be consumed as expected. -%Thread pool is then destroyed right after starting running. Stdout of the program is: \begin{ccode} -PI calculated with 101 terms: 3.141592653589793 +PI calculated with 100 terms: 3.141592653589793 \end{ccode} \textbf{Exchange} -In function \monobox{thread\_pool\_destroy}, \monobox{atomic\_exchange(\&thrd\_pool->state, cancelled)} reads current state and replaces it with "cancelled". A warning message is printed if the pool is destroyed when still running. -If the exchange is not performed atomically, we may initially get the state as "running". Subsequently, a thread could set the state to "cancelled" after finishing the last one, resulting in a false warning. +In function \monobox{thread\_pool\_destroy}, \monobox{atomic\_exchange(\&thrd\_pool->state, cancelled)} reads the current state and replaces it with ``cancelled''. +A warning message is printed if the pool is destroyed while workers are still ``running''. +If the exchange is not performed atomically, we may initially get the state as ``running''. Subsequently, a thread could set the state to ``cancelled'' after finishing the last one, resulting in a false warning. \textbf{Test and set} -In the example, the scenario is as follows: -First, the main thread initially acquire a lock \monobox{future->flag} and then set it true, -which is akin to creating a job and then transfer its ownership to the worker. -Subsequently, the main thread will be blocked until the worker clear the flag. -This inidcate the main thread will wail until the worker completes the job and return the ownership back to the main thread, which ensure correct cooperation. +In this example, the scenario is as follows: +First, the main thread initially acquires a lock \monobox{future->flag} and then sets it true, +which is akin to creating a job and then transferring its ownership to the worker. +Subsequently, the main thread will be blocked until the worker clears the flag. +This indicates that the main thread will wail until the worker completes the job and returns ownership back to the main thread, which ensures correct cooperation. \textbf{Fetch and…} -In the function \monobox{thread\_pool\_destroy}, \monobox{atomic\_fetch\_and} is utilized as a means to set the state to idle. +In the function \monobox{thread\_pool\_destroy}, \monobox{atomic\_fetch\_and} is utilized as a means to set the state to ``idle''. Yet, in this case, it is not necessary, as the pool needs to be reinitialized for further use regardless. Its return value could be further utilized, for instance, to report the previous state and perform additional actions. \textbf{Compare and swap} Once threads are created in the thread pool as workers, they will continuously search for jobs to do. -Jobs are taken from the tail of job queue. -To claim a job without it being taken by another worker halfway through, we need to atomically change the pointer to the last job. Otherwise the last job is under races. -The while loop in function \monobox{worker}, +Jobs are taken from the tail of the job queue. +To take a job without being taken by another worker halfway through, we need to atomically change the pointer to the last job. +Otherwise, the last job is under race. +The while loop in the function \monobox{worker}, \begin{ccode} while (!atomic_compare_exchange_weak(&thrd_pool->head->prev, &job, job->prev)) { @@ -575,10 +582,6 @@ \subsection{Further improvements} Without specifying, atomic operations in \clang{}11 atomic library use \monobox{memory\_order\_seq\_cst} as default memory order. Operations post-fix with \monobox{\_explicit} accept an additional argument to specify which memory order to use. How to leverage memory orders to optimize performance will be covered later in \secref{lock-example}. -You may have noticed that there is padding after \monobox{\_Atomic(job\_t *) prev} in \monobox{struct idle\_job} in the example. -It is used for preventing \introduce{false sharing} in a cache line. -Further discussion on cache effects and false sharing is provided in \secref{false-sharing}. - \section{Atomic operations as building blocks} Atomic loads, stores, and \textsc{RMW} operations are the building blocks for every single concurrency tool. diff --git a/examples/rmw_example.c b/examples/rmw_example.c index 155438a..991b7c5 100644 --- a/examples/rmw_example.c +++ b/examples/rmw_example.c @@ -4,29 +4,28 @@ #include #include #include - #include #define PRECISION 100 /* upper bound in BPP sum */ - #define CACHE_LINE_SIZE 64 #define N_THREADS 64 struct tpool_future { void *result; + void *arg; atomic_flag flag; }; typedef struct job { void *(*func)(void *); - void *args; struct tpool_future *future; struct job *next, *prev; } job_t; typedef struct idle_job { _Atomic(job_t *) prev; - char padding[CACHE_LINE_SIZE - sizeof(_Atomic(job_t *))]; + char padding[CACHE_LINE_SIZE - + sizeof(_Atomic(job_t *))]; /* avoid false sharing */ job_t job; } idle_job_t; @@ -38,15 +37,15 @@ typedef struct tpool { thrd_t *pool; atomic_int state; thrd_start_t func; - // job queue is a SPMC ring buffer - idle_job_t *head; + idle_job_t *head; /* job queue is a SPMC ring buffer */ } tpool_t; -static struct tpool_future *tpool_future_create(void) +static struct tpool_future *tpool_future_create(void *arg) { struct tpool_future *future = malloc(sizeof(struct tpool_future)); if (future) { future->result = NULL; + future->arg = arg; atomic_flag_clear(&future->flag); atomic_flag_test_and_set(&future->flag); } @@ -72,28 +71,28 @@ static int worker(void *args) tpool_t *thrd_pool = (tpool_t *)args; while (1) { + /* worker is laid off */ if (atomic_load(&thrd_pool->state) == cancelled) return EXIT_SUCCESS; if (atomic_load(&thrd_pool->state) == running) { - // claim the job + /* worker takes the job */ job_t *job = atomic_load(&thrd_pool->head->prev); - while (!atomic_compare_exchange_weak(&thrd_pool->head->prev, &job, - job->prev)) { - } - if (job->args == NULL) { + /* worker checks if there is only an idle job in the job queue */ + if (job == &thrd_pool->head->job) { + /* worker says it is idle */ atomic_store(&thrd_pool->state, idle); - } else { - void *ret_value = job->func(job->args); - job->future->result = ret_value; - atomic_flag_clear(&job->future->flag); - free(job->args); - free(job); + thrd_yield(); + continue; } + while (!atomic_compare_exchange_weak(&thrd_pool->head->prev, &job, + job->prev)) + ; + job->future->result = (void *)job->func(job->future->arg); + atomic_flag_clear(&job->future->flag); + free(job); } else { - /* To auto run when jobs added, set status to running if job queue is not empty. - * As long as the producer is protected */ + /* worker is idle */ thrd_yield(); - continue; } }; return EXIT_SUCCESS; @@ -113,14 +112,13 @@ static bool tpool_init(tpool_t *thrd_pool, size_t size) return false; } - // May use memory pool for jobs idle_job_t *idle_job = malloc(sizeof(idle_job_t)); if (!idle_job) { printf("Failed to allocate idle job.\n"); return false; } - // idle_job will always be the first job - idle_job->job.args = NULL; + + /* idle_job will always be the first job */ idle_job->job.next = &idle_job->job; idle_job->job.prev = &idle_job->job; idle_job->prev = &idle_job->job; @@ -129,10 +127,9 @@ static bool tpool_init(tpool_t *thrd_pool, size_t size) thrd_pool->state = idle; thrd_pool->size = size; - for (size_t i = 0; i < size; i++) { + /* employer hires many workers */ + for (size_t i = 0; i < size; i++) thrd_create(thrd_pool->pool + i, worker, thrd_pool); - //TODO: error handling - } return true; } @@ -141,9 +138,10 @@ static void tpool_destroy(tpool_t *thrd_pool) { if (atomic_exchange(&thrd_pool->state, cancelled)) printf("Thread pool cancelled with jobs still running.\n"); - for (int i = 0; i < thrd_pool->size; i++) { + + for (int i = 0; i < thrd_pool->size; i++) thrd_join(thrd_pool->pool[i], NULL); - } + while (thrd_pool->head->prev != &thrd_pool->head->job) { job_t *job = thrd_pool->head->prev->prev; free(thrd_pool->head->prev); @@ -164,28 +162,25 @@ static void *bbp(void *arg) double *product = malloc(sizeof(double)); if (!product) return NULL; - + *product = 1 / pow(16, k) * sum; return (void *)product; } struct tpool_future *add_job(tpool_t *thrd_pool, void *(*func)(void *), - void *args) + void *arg) { - // May use memory pool for jobs job_t *job = malloc(sizeof(job_t)); if (!job) return NULL; - struct tpool_future *future = tpool_future_create(); - if (!future){ + struct tpool_future *future = tpool_future_create(arg); + if (!future) { free(job); return NULL; } - // unprotected producer - job->args = args; - job->func = bbp; + job->func = func; job->future = future; job->next = thrd_pool->head->job.next; job->prev = &thrd_pool->head->job; @@ -193,7 +188,7 @@ struct tpool_future *add_job(tpool_t *thrd_pool, void *(*func)(void *), thrd_pool->head->job.next = job; if (thrd_pool->head->prev == &thrd_pool->head->job) { thrd_pool->head->prev = job; - // trap worker at idle job + /* the previous job of the idle job is itself */ thrd_pool->head->job.prev = &thrd_pool->head->job; } return future; @@ -201,14 +196,14 @@ struct tpool_future *add_job(tpool_t *thrd_pool, void *(*func)(void *), static inline void wait_until(tpool_t *thrd_pool, int state) { - while (atomic_load(&thrd_pool->state) != state) { + while (atomic_load(&thrd_pool->state) != state) thrd_yield(); - } } int main() { - struct tpool_future *futures[PRECISION + 1]; + int bbp_args[PRECISION]; + struct tpool_future *futures[PRECISION]; double bbp_sum = 0; tpool_t thrd_pool = { .initialezed = ATOMIC_FLAG_INIT }; @@ -216,26 +211,30 @@ int main() printf("failed to init.\n"); return 0; } - for (int i = 0; i <= PRECISION; i++) { - int *id = malloc(sizeof(int)); - *id = i; - futures[i] = add_job(&thrd_pool, bbp, id); - } - // Due to simplified job queue (not protecting producer), starting the pool manually + /* employer ask workers to work */ atomic_store(&thrd_pool.state, running); + + /* employer wait ... until workers are idle */ wait_until(&thrd_pool, idle); - for (int i = 0; i <= PRECISION; i++) { - int *id = malloc(sizeof(int)); - *id = i; - add_job(&thrd_pool, bbp, id); + + /* employer add more job to the job queue */ + for (int i = 0; i < PRECISION; i++) { + bbp_args[i] = i; + futures[i] = add_job(&thrd_pool, bbp, &bbp_args[i]); } - for (int i = 0; i <= PRECISION; i++) { + + /* employer ask workers to work */ + atomic_store(&thrd_pool.state, running); + + /* employer wait for the result of job */ + for (int i = 0; i < PRECISION; i++) { tpool_future_wait(futures[i]); bbp_sum += *(double *)(futures[i]->result); tpool_future_destroy(futures[i]); } - atomic_store(&thrd_pool.state, running); + + /* employer destroys the job queue and lays workers off */ tpool_destroy(&thrd_pool); - printf("PI calculated with %d terms: %.15f\n", PRECISION + 1, bbp_sum); + printf("PI calculated with %d terms: %.15f\n", PRECISION, bbp_sum); return 0; } diff --git a/images/atomic_rmw.pdf b/images/atomic-rmw.pdf similarity index 100% rename from images/atomic_rmw.pdf rename to images/atomic-rmw.pdf diff --git a/images/atomic_types.pdf b/images/atomic-types.pdf similarity index 100% rename from images/atomic_types.pdf rename to images/atomic-types.pdf From b3cd30f4c9bf42374394f025056b9c06e1166d15 Mon Sep 17 00:00:00 2001 From: Wei-Hsin Yeh Date: Fri, 28 Jun 2024 14:04:28 +0800 Subject: [PATCH 29/30] Add new section "Shared Resources" and "Lock-Free" Expand the shared resources from the programmer's view in "Read Modify Write" to the cache-line perspective in section "Shared Resources". Move the original "Cache effects and false sharing" to "Shared Resources". Add an explanation of how the scalability of locks will be limited by cache coherence. Change the title of Section "Atomic operations as building blocks" to "Concurrency tools and synchronization mechanisms". Introduce a blocking mechanism with its issues, including waiting times, deadlock, and scalability. Give an example of how synchronization without using a lock can also lead to a livelock. Introduce the three types of progress, which are wait-free, lock-free, and obstruction-free. - Add a figure containing three tables showing the progress of every thread and the progress of the overall system. Give a SPMC problem to explain how to achieve fully lock-free by using algorithms and data structure designs with appropriate synchronization mechanisms and concurrency tools. - Add two flowcharts: one for "lock-based" solution 1 and another for "lock-based and lock-free" solution 2 and a diagram illustrating different data structures to show different data structures. --- concurrency-primer.tex | 359 ++++++++++++++++++++++++++++++-------- images/false-sharing.pdf | Bin 0 -> 9280 bytes images/progress-type.pdf | Bin 0 -> 10242 bytes images/spinlock.pdf | Bin 0 -> 9908 bytes images/spmc-solution1.pdf | Bin 0 -> 11675 bytes images/spmc-solution2.pdf | Bin 0 -> 9623 bytes images/spmc-solution3.pdf | Bin 0 -> 9657 bytes 7 files changed, 290 insertions(+), 69 deletions(-) create mode 100644 images/false-sharing.pdf create mode 100644 images/progress-type.pdf create mode 100644 images/spinlock.pdf create mode 100644 images/spmc-solution1.pdf create mode 100644 images/spmc-solution2.pdf create mode 100644 images/spmc-solution3.pdf diff --git a/concurrency-primer.tex b/concurrency-primer.tex index d531e32..292a72c 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -221,7 +221,7 @@ \section{Background} A stall, or suspension of forward progress, occurs when an instruction awaits the outcome of a preceding one in the pipeline until the necessary result becomes available.} or to optimize data locality.\punckern\footnote{% \textsc{RAM} accesses data not byte by byte, but in larger units known as \introduce{cache lines}. Grouping frequently used variables on the same cache line means they are processed together, -significantly boosting performance. However, as discussed in \secref{false-sharing}, +significantly boosting performance. However, as discussed in \secref{shared-resources}, this strategy can lead to complications when cache lines are shared across cores.} Variables may be allocated to the same memory location if their usage does not overlap. @@ -506,7 +506,6 @@ \subsection{Compare and swap} It uses atomic \textsc{CAS} to ensure that Modify is atomic, coupled with a while loop to ensure that the entire \textsc{RMW} can behave atomically. -~\\ However, atomic \textsc{RMW} operations here are merely a programming tool for programmers to achieve program logic correctness. Its actual execution as atomic operations depends on the how compiler translate it into actual atomic instructions based on differenct hardware instruction set. \introduce{Exchange}, \introduce{Fetch-and-Add}, \introduce{Test-and-set} and \textsc{CAS} in instruction level are different style of atomic \textsc{RMW} instructions. @@ -582,14 +581,78 @@ \subsection{Further improvements} Without specifying, atomic operations in \clang{}11 atomic library use \monobox{memory\_order\_seq\_cst} as default memory order. Operations post-fix with \monobox{\_explicit} accept an additional argument to specify which memory order to use. How to leverage memory orders to optimize performance will be covered later in \secref{lock-example}. -\section{Atomic operations as building blocks} +\section{Shared Resources} +\label{shared-resources} +From \secref{rmw}, we have understood that there are two types of shared resources that need to be considered. +The first type is shared resources that concurrent threads will access in order to collaborate to achieve a goal. +The second type is shared resources that serve as a communication channel for concurrent threads, +ensuring correct access to shared resources. +However, all of these considerations stem from a programming perspective, +where we only distinguish between shared resources and private resources. + +Given all the complexities to consider, modern hardware adds another layer to the puzzle, +as depicted in \fig{fig:dunnington}. +Remember, memory moves between the main \textsc{RAM} and the \textsc{CPU} in segments known as cache lines. +These cache lines also represent the smallest unit of data transferred between cores and caches. +When one core writes a value and another reads it, +the entire cache line containing that value must be transferred from the first core's cache(s) to the second core's cache(s), +ensuring a coherent ``view'' of memory across cores. This dynamic can significantly affect performance. + +This slowdown is even more insidious when it occurs between unrelated variables that happen to be placed on the same shared resource, +which is the cache line, as shown in \fig{fig:false-sharing}. +When designing concurrent data structures or algorithms, +this \introduce{false sharing} must be taken into account. +One way to avoid it is to pad atomic variables with a cache line of private data, +but this is obviously a large space-time trade-off. + +\includegraphics[keepaspectratio, width=0.6\linewidth]{images/false-sharing} +\captionof{figure}{Processor 1 and Processor 2 operate independently on variables A and B. +Simultaneously, they read the cache line containing these two variables. +In the next time step, each processor modifies A and B in their private L1 cache separately. +Subsequently, both processors write their modified cache line to the shared L2 cache. +At this moment, the expansion of the scope of shared resources to encompass cache lines highlights the importance of considering cache coherence issues.} +\label{fig:false-sharing} + +Not only shared resources, +but we also need to consider shared resources that serve as a communication channel, e.g. spinlock (see \secref{spinlock}). +Processors using locks as a communication channel also need to transfer the cache line. +When a processor broadcasts the release of a lock, +multiple processors on different chips attempt to acquire the lock simultaneously. +To ensure a consistent state of the lock across all private L1 cache lines, +which is a part of cache coherence, +the cache line containing the lock will be continually transferred among the caches of those cores. +Unless the critical sections are considerably lengthy, +the time spent managing this cache line movement could exceed the time spent within the critical sections themselves,\punckern\footnote{% +This situation underlines how some systems may experience a cache miss that is substantially more costly than an atomic \textsc{RMW} operation, +as discussed in Paul~E.\ McKenney's +\href{https://www.youtube.com/watch?v=74QjNwYAJ7M}{talk from CppCon~2017} +for a deeper exploration.} +despite the algorithm's non-blocking nature. + +With these high communication costs, there may be only one processor that succeeds in acquiring it again in the case of mutex lock or spinlock, as shown in \fig{fig:spinlock}. +Then the other processors that have not successfully acquired the lock will continue to wait, +resulting in little practical benefit (only one processor gains the lock) and significant communication overhead. +This disparity severely limits the scalability of the spin lock. + +\includegraphics[keepaspectratio, width=0.9\linewidth]{images/spinlock} +\captionof{figure}{Three processors use lock as a communication channel to insure the access operations to the shared L2 cache will be correct. +Processors 2 and 3 are trying to acquire a lock that is held by processor 1. +Therefore, when processor 1 unlocks, +the state of lock needs to be updated on other processors' private L1 cache.} +\label{fig:spinlock} +\section{Concurrency tools and synchronization mechanisms} +\label{concurrency-tool} Atomic loads, stores, and \textsc{RMW} operations are the building blocks for every single concurrency tool. It is useful to split those tools into two camps: \introduce{blocking} and \introduce{lockless}. -Blocking synchronization methods are generally easier to understand, -but they can cause threads to pause for unpredictable durations. +As mentioned in \secref{rmw}, multiple threads can use these blocking tools to communicate with others. +Furthermore, these blocking tools can even assist in synchronization between threads. +The blocking mechanism is quite simple, +because all threads need to do is block others in order to make their own progress. +However, this simplicity can also cause threads to pause for unpredictable durations and then influence the progress of the overall system. + Take a mutex as an example: it requires threads to access shared data sequentially. If a thread locks the mutex and another attempts to lock it too, @@ -598,30 +661,227 @@ \section{Atomic operations as building blocks} Additionally, blocking mechanisms are prone to \introduce{deadlock} and \introduce{livelock}, issues that lead to the system becoming immobilized as threads perpetually wait on each other. -In contrast, lockless synchronization methods ensure that the program is always making forward progress. -These are \introduce{non-blocking} since no thread can cause another to wait indefinitely. -Consider a program that streams audio, -or an embedded system where a sensor triggers an interrupt service routine (\textsc{ISR}) when new data arrives. -We want lock-free algorithms and data structures in these situations, -since blocking could break them. -(In the first case, the user's audio will begin to stutter if sound data is not provided at the bitrate it is consumed. -In the second, subsequent sensor inputs could be missed if the \textsc{isr} does not complete as quickly as possible.) - -% FIXME: remove this hack -% LaTeX provides 9 symbols when using symbol option, therefore it produces an error if we count higher. -\setcounter{footnote}{0} -Lockless algorithms are not inherently superior or quicker than blocking ones; -they serve different purposes with their own design philosophies. -Additionally, the mere use of atomic operations does not render algorithms lock-free. -For example, basic spinlock is still considered a blocking algorithm even though it eschews \textsc{OS}-specific syscalls for making the blocked thread sleep. -Putting a blocked thread to sleep is often an optimization, -allowing the operating system's scheduler to allocate \textsc{CPU} resources to active threads until the blocked one is revived. -Some concurrency libraries even introduce hybrid locks that combine brief spinning with sleeping to balance \textsc{CPU} usage and context-switching overheads. - -Both blocking and lockless approaches have their place in software development. -When performance is a key consideration, it is crucial to profile your application. -The performance impact varies with numerous factors, such as thread count and \textsc{CPU} architecture specifics. -Balancing complexity and performance is essential in concurrency, a domain fraught with challenges. +If the first thread acquires a mutex first, +then the second thread locks another mutex and subsequently attempts to lock the mutex held by the first thread. +At the same time, the first thread also tries to lock the mutex held by the second thread. +Then the deadlock occurs. +Therefore, we can see that deadlock occurs when different threads acquire locks in incompatible orders, +leading to system immobilization as threads perpetually wait on each other. + +Additionally, in \secref{shared-resources}, +we can see another problem with the lock: its scalability is limited. + +After understanding the issue that blocking mechanisms are prone to, +we try to achieve synchronization between threads without lock. +Consider the program below: if there is only a single thread, execute these operations as follows: + +\begin{cppcode} +while (x == 0) + x = 1 - x; +\end{cppcode} + +When executed by a single thread, these operations complete within a finite time. +However, with two threads executing concurrently, +if one thread executes \cpp|x = 1 - x| and the other thread executes \cpp|x = 1 - x| subsequently, +then the value of x will always be 0, which will lead to a livelock. +Therefore, even without any locks in concurrent threads, +we still cannot guarantee that the overall system can make progress toward achieving the programmer's goals. + +Consequently, we should not focus on comparing which communication tools or synchronization mechanisms are better, +but rather on exploring how to effectively use these tools in a given scenario to facilitate smooth communication between threads and achieve the programmer's goals. + +\section{Lock free} +In \secref{concurrency-tool}, we explored different mechanisms based on the characteristics of concurrency tools, +as described in \secref{atomicity} and \secref{rmw}. +In this section, we need to explore which strategies can help programmers to design a concurrency program +that allows concurrent threads to collectively ensure progress in the overall system while also improving scalability, +which is the initial goal of designing a concurrency program. +First of all, we must figure out the scope of our problem. +Understanding the relationship between the progress of each thread and the progress of the entire system is necessary. + +\subsection{Type of progress} +When we consider the scenario where many concurrent threads collaborate and each thread is divided into many operations, + +\textbf{Wait-Free} Every operation in every thread will be completed within a limited time. +This also implies that each operation contributes to the overall progress of the system. + +\textbf{Lock-Free} At any given moment, among all operations in every thread, +at least one operation contributes to the overall progress of the system. +However, it does not guarantee that starvation will not occur. + +\textbf{Obstruction-Free} At any given time, if there is only a single thread operating without interference from other threads, +its instructions can be completed within a finite time. However, when threads are working concurrently, +it does not guarantee progress. + +Therefore, we can understand their three relationships as follows: +obstruction-free includes lock-free and lock-free includes wait-free. +Achieving wait-free is the most optimal approach, +allowing each thread to make progress without being blocked by other threads. + +\includegraphics[keepaspectratio, width=1 \linewidth]{images/progress-type} +\captionof{figure}{In a wait-free system, each thread is guaranteed to make progress at every moment because no thread can block others. +This ensures that the overall system can always make progress. +In a lock-free system, at Time 1, Thread 1 may cause other threads to wait while it performs its operation. +However, even if Thread 1 suspends at Time 2, it does not subsequently block other threads. +This allows Thread 2 to make progress at Time 3, ensuring that the overall system continues to make progress even if one thread is suspended. +In an obstruction-free system, when Thread 1 is suspended at Time 2, +it causes other threads to be blocked as a result. This means that by Time 3, +Thread 2 and Thread 3 are still waiting, preventing the system from making progress thereafter. +Therefore, obstruction-free systems may halt progress if one thread is suspended, +leading to the potential blocking of other threads and even stalling the system.} +\label{fig:progress-type} + +The main goal is that the whole system, +which contains all concurrent threads, +is always making forward progress. +To achieve this goal, we rely on concurrency tools, +including atomic operation and the operations that perform atomically, as described in \secref{rmw}. +Additionally, we carefully select synchronization mechanism, as described in \secref{concurrency-tool}, +which may involve utilizing shared resources for communication (e.g., spinlock), as described in \secref{shared-resources}. +Furthermore, we design our program with appropriate data structures and algorithms. +Therefore, lock-free doesn't mean we cannot use any lock; +we just need to ensure that the blocking mechanism will not limit the scalability and that the system can avoid the problems described in \secref{concurrency-tool} (e.g., long time of waiting, deadlock). + +Next, we take the single producer and multiple consumers problem as an example to demonstrate how to achieve fully lock-free programming by improving some implementations step by step.\punckern\footnote{% +The first three solutions, which are \secref{spmc-solution1}, \secref{spmc-solution2}, and \secref{spmc-solution3}, are referenced in the Herb Sutter's +\href{https://youtu.be/c1gO9aB9nbs?si=7qJs-0qZAVqLHr1P}{talk from CppCon~2014.}} +This problem is that one producer generates tasks and adds them to a job queue, +and multiple consumers take tasks from the job queue and execute them. +\subsection{SPMC solution - lock-based} +\label{spmc-solution1} +Firstly, introduce the scenario of lock-based algorithms. +At any time, there is only one consumer that can get the lock to access the job queue. +This is because in this scenario, the lock is mutex lock, also known as a mutual exclusive lock. +Not until the consumer releases the lock are the other consumers blocked when attempting to access the job queue. + +The following text explains the meaning of each state in the \fig{fig:spmc-solution1}. + +\textbf{state 1} : The producer is adding tasks to the job queue while multiple consumers wait for tasks to become available and is ready to take on any job that appears in the job queue. + +\textbf{state 2} \to \textbf{state 3} : After the producer adds a task to the job queue, +the producer releases the mutex lock, and then wake the consumers up. +Those consumers tried to acquire the lock of the job queue for the job before. + +\textbf{state 3} \to \textbf{state 4} : Consumer 1 acquires the mutex lock for the job queue, +retrieves a task from it, and then releases the mutex lock. + +\textbf{state 5} : Next, other consumers attempt to acquire the mutex lock for the job queue. +However, after they acquire the lock, they find no tasks in the queue. +This is because the producer has not added more tasks to the job queue. + +\textbf{state 6} : Consequently, the consumers wait on a condition variable. +During this time, the consumers are not busy waiting but rather waiting for the producer to wake it up. +This is because the mechanism is an advanced form of mutex lock. + +\includegraphics[keepaspectratio, width=0.6\linewidth]{images/spmc-solution1} +\captionof{figure}{The interaction between the producer and consumer in SPMC Solution 1, +including their state transitions.} +\label{fig:spmc-solution1} + +The reason why this implementation is not lock-free is: +First, if a producer suspends, +it causes consumers to have no job available, +leading them to block and thus halting progress in the entire system, +which is obstruction-free, as shown in the \fig{fig:progress-type}. +Secondly, consumers concurrently need to access shared resources, which is the job. +Then, one consumer acquires the lock of the job queue but suddenly gets suspended before completing without unlocking, +causing other consumers to be blocked. +Meanwhile, the producer still keeps adding jobs, but the system fails to make any progress, +which is obstruction-free, as shown in the \fig{fig:progress-type}. +Therefore, neither the former nor the latter implementation approach is lock-free. + +\subsection{SPMC solution - lock-based and lock-free} +\label{spmc-solution2} +As described in \secref{spmc-solution1}, there is a problem when the producer suspends; +the whole system cannot make any progress. +Additionally, consumers contend for the lock of the job queue to access the job; +however, after they acquire the lock, they may still need to wait when the queue is empty. +To solve this issue, the introduction of lock-based and lock-free algorithm is presented in this section. + +The following text explains the meaning of each state in the \fig{fig:spmc-solution2}. + +\textbf{state 0} : The producer prepares all the jobs in advance. + +\textbf{state 1} : Consumer 1 acquires the lock on the job queue, takes a job, and releases the lock. + +\textbf{state 2} : After consumer 2 acquires the lock, it definitely can find that there are still jobs in the queue. + +Through this approach, once a consumer obtains the lock on the job queue, +there is guaranteed job available unless all jobs have been taken by other consumers. +Thus, there is no need to wait due to a lack of jobs; +the only wait is for acquiring the lock to access the job queue. + +\includegraphics[keepaspectratio, width=0.7\linewidth]{images/spmc-solution2} +\captionof{figure}{The interaction between the producer and consumer in Solution 2, +including their state transitions.} +\label{fig:spmc-solution2} + +This implementation is referred to as both locked-based and lock-free. +The algorithm is designed such that the producer adds all jobs to the job queue before multiple consumers begin taking them. +This design ensures that if the producer suspends or adds the job slowly, +consumers will not be blocked due to the lack of a job. +Consumers just thought they have done all the jobs that the producer added. +Therefore, this implementation qualifies as lock-free, as shown in \fig{fig:progress-type}. +The reason that implementation of getting a job is locked-based, not lock-free, +is the same as the second reason described in \secref{spmc-solution1}. + +\subsection{SPMC solution - fully lock-free} +\label{spmc-solution3} +As described in \secref{shared-resources}, +we can understand that communications between processors across a chip are through cache lines, +which incurs high costs. Additionally, using locks further decreases overall performance and limits scalability. +However, when locks are necessary for concurrent threads to communicate, +reducing the sharing resource and the granularity of the sharing resource to communicate (e.g., spinlock, mutex lock) is crucial. +Therefore, to achieve fully lock-free programming, we change the data structure to reduce the granularity of locks. + +\includegraphics[keepaspectratio, width=1\linewidth]{images/spmc-solution3} +\captionof{figure}{The left side shows that the lock protects the entire job queue to ensure exclusive access to its head for multiple threads. +The right side illustrates that each thread has its own slot for accessing jobs, +not only achieving exclusivity through data structure but also eliminating the need for shared resources for communication.} +\label{fig:spmc-solution3} + +Providing each consumer with their own unique slot to access jobs addresses the problem at its root, +directly avoiding competition. +By doing so, consumers no longer rely on a shared resource for communication. +Consequently, other consumers will not be blocked by a suspended consumer holding a lock. +This approach ensures that the system maintains its progress, +as each consumer operates independently within their own slot, +which is lock-free, as shown in \fig{fig:progress-type}. + +\subsection{SPMC solution - fully lock-free with CAS} +\label{SPMC-solution4} +In addition to reducing granularity, +there is another way to avoid that if one consumer acquires the lock on the job queue but suddenly gets suspended, +causing other consumers to be blocked as described in \secref{spmc-solution2}. +As described in \secref{cas}, we can use \textsc{CAS} with a loop to ensure that the write operation achieves semantic atomicity. + +Unlike \secref{spmc-solution2}, +which uses a shared resource (e.g., advanced form of mutex lock) for blocking synchronization, +the first thread holding the lock causes the other threads to wait until the first thread releases the lock. +As described in \secref{cas}, \textsc{CAS} allows threads that initially failed to acquire the lock to continue to execute Read and Modify. +Therefore, we can conclude that if one thread is blocked, +it indicates that there is another thread is making progress, +which is lock-free, as shown in \fig{fig:progress-type}. + +As described in \secref{spmc-solution2}, a blocking mechanism uses mutex lock; +we can see that only one thread is active when it accesses the job queue. +Although \textsc{CAS} will continue to execute Read and Modify, +it doesn't result in an increase in overall progress. +This is because the operations will be useless when atomic \textsc{CAS} fails. +Therefore, we can understand that lock-free algorithms are not faster than blocking ones. +The reason for using lock-free is to ensure that if one thread is blocked, +it doesn't cause other threads to be blocked, +thereby ensuring that the overall system must make progress over a long period of time. + +\subsection{Conclusion about lock-free} +In conclusion about lockfree, +we can see that both blocking and lockless approaches have their place in software development. +They serve different purposes with their own design philosophies. +When performance is a key consideration, it is crucial to profile your application, +take advantage of every concurrency tool or mechanism, and accompany them with appropriate data structures and algorithms. +The performance impact varies with numerous factors, such as thread count and CPU architecture specifics. +Balancing complexity and performance is essential in concurrency, +a domain fraught with challenges. \section{Sequential consistency on weakly-ordered hardware} @@ -885,7 +1145,7 @@ \subsection{Acquire and release} On \textsc{Arm} and other weakly-ordered architectures, this enables us to eliminate one of the memory barriers in each operation, such that - \begin{cppcode} +\begin{cppcode} int acquireFoo() { return foo.load(memory_order_acquire); @@ -1124,45 +1384,6 @@ \section{Hardware convergence} \textsc{Arm}v8 processors offer dedicated load-acquire and store-release instructions: \keyword{lda} and \keyword{stl}. Hopefully, future \textsc{CPU} architectures will follow suit. -\section{Cache effects and false sharing} -\label{false-sharing} - -Given all the complexities to consider, modern hardware adds another layer to the puzzle. -Remember, memory moves between main \textsc{RAM} and the \textsc{CPU} in segments known as cache lines. -These lines also represent the smallest unit of data transferred between cores and their caches. -When one core writes a value and another reads it, -the entire cache line containing that value must be transferred from the first core's cache(s) to the second, -ensuring a coherent ``view'' of memory across cores. - -This dynamic can significantly affect performance. -Take a readers-writer lock, for example, -which prevents data races by allowing either a single writer or multiple readers access to shared data but not simultaneously. -At its most basic, this concept can be summarized as follows: -\begin{cppcode} -struct RWLock { - int readers; - bool hasWriter; // Zero or one writers -}; -\end{cppcode} -Writers must wait until the \cc|readers| count drops to zero, -while readers can acquire the lock through an atomic \textsc{RMW} operation if \cc|hasWriter| is \cpp|false|. - -At first glance, this approach might seem significantly more efficient than exclusive locking mechanisms (e.g., mutexes or spinlocks) in scenarios where shared data is read more frequently than written. -However, this perspective overlooks the impact of cache coherence. -If multiple readers on different cores attempt to acquire the lock simultaneously, -the cache line containing the lock will constantly be transferred among the caches of those cores. -Unless the critical sections are considerably lengthy, -the time spent managing this cache line movement could exceed the time spent within the critical sections themselves,\punckern\footnote{% -This situation underlines how some systems may experience a cache miss that is substantially more costly than an atomic \textsc{RMW} operation, -as discussed in Paul~E.\ McKenney's -\href{https://www.youtube.com/watch?v=74QjNwYAJ7M}{talk from CppCon~2017} -for a deeper exploration.} -despite the algorithm's non-blocking nature. - -This slowdown is even more insidious when it occurs between unrelated variables that happen to be placed on the same cache line. -When designing concurrent data structures or algorithms, -this \introduce{false sharing} must be taken into account. -One way to avoid it is to pad atomic variables with a cache line of unshared data, but this is obviously a large space-time tradeoff. \section{If concurrency is the question, \texttt{volatile} is not the answer.} % Todo: Add ongoing work from JF's CppCon 2019 talk? diff --git a/images/false-sharing.pdf b/images/false-sharing.pdf new file mode 100644 index 0000000000000000000000000000000000000000..467ab278685dbab87cc98848fd2f0ceb935ca0c4 GIT binary patch literal 9280 zcma)?bzGEN*RWMUkWwU+9#SNS8fHjokQ9`z0R|XisG&=x8vzMPK}w{%K|&ho?h>Ry z5JC86@OU1dbKdX$z4vD3X05$;t+n_1W3J7lDka0g&B>3;RIy!nhRX*40&I;fafO8e zT#7J+Inn~ak5Z}O0ssIm8MrkP=74%zLy<5kn2D_^3|CYX*AeLegWBM_CRb>5A*clI zG@NMeO?cg}B`d6PyVm-gh;|7-%6HNCZ99eJ0X;U(!HJu6B`K4LNFouH+@_NB*mAyR zq^a!pNgyXhr~V9qA-3iGTs?ANF6L~{#^?mQvEk$Vr^Rg{+0>cU<`siF)vb79NdM+e z(M-|d!r5&^9VOV6KKjlYQXyS(u9V6E0kR@J&LAe!b(d-*Etb#n2MwW}yEr7Wd8-dW6@_G zMk?a^ouV>w*(9{&-M1T`I!C3)tIf8X7~OWv{!(IHSdG;YI|;DY#qPi!QIPZG47)AR z+LT#kBXgl*?7QJwL9SEa6L=IoDtbb3$+!QZXZ@TnQ|9u6uOc{j7J7^IoJ~aIUC% zQq~A~ftor&w=9Mu!85p24JIR>HANMLLxJ%!YqN|VLlL(}+KG;sa4_L8Zmhe3s!}Ap z>!!X29-^W@GC`_fJ!d|fq*i2`BR4{U4C}E^2siHe8Me^!P&vU2aRJe{pN0)3;z@l^ zGE=QIqa)soexy!9n7Nd%^A0~8=be3m6;78`!%#NG%I`L9hxs!G6FYKATb2Af<5Yc2 z)Q1*6`j<7^>FZ2i-{T3Y#@F5T8DJ&|jFpoV5Y>v7uY5Ejy~h z^UB*#6@2z|Dq{kBCw-Am%VhbPD(RM25La2#!#$Zyj_7T34ADEzJYcBQsd)I^fa23T zthsLC8WPV)Vwh!Ooo7#bOOlckx*jfe#Zp~oURB;Po(bj*#G)3r5T`0`j*hLhT>DLXVld!r zxIKk$F(G4G5)+E^yp@Tu-vjt`a!x#aN=W%<2EHZxN6$y;hEjw!`8E#iftAY1A=OxN zLN3&1#DspwEGc<~{ff7hxbj?Ie+KiKbJN5#D6f=2FJB2?@SI}_CA5g)!Vso^-Fk@xRpbCq7CU6HctNK|S82n(YXsD}#*L4vE*V<{@(*h>4j=~*0Q#M4g)X&xUU9R zg*w1ceR2P8%cTS}g+nE5T>-i%6%fD&1_F5bfB^j~Yf03&D04>uI$!=uj2=ZOIRC7p%fOKZ zU#6AJIN**P4c#M|1rUzC34Q_&RQtwFIvJz4c6fLs(^PBG5ccMdn?ktqK{lA8^cXiG zar^Sg1V)s!!M7nG?YCgcr2(SciXsn8=TiCO@8&0Gm7yA%&orGH+`rB3{+xTnLf0li zffLnS-|iUaX?H#-c4sM%-j6M%%2+#{d)HhOXTOqI!bOu)yZC!(fSB>D=F755cCGSS zo{Ls2^O6fjy6&#`r;hXok|vRhXM|gnYW~!P&%fc*T}WByXmS`-TFw^r>XsB3dkkj0 zadX6}p=bg1KTseW(5~<^>^s1b|NIF%&eP{2dNp6k(4;Qvj`O`mxva4^%~G>ijpoRS z@0!6I&sa-&A|61t*M)59hfezsQ)&@K%o!V@`2?4`t&RlT#O#>#As4 zOetp;6PD^yFh?pm6Ib-g97vCgJXBDTM$lTrz&kZLjy6_SZ6-vv{}8jp#4fv$*18uw zB9VXKU7fqCTGm0!Qe-t@2kaiP80eLDY1&sFNVJG$;1Zw_%RbGlCJ?5a!PaaMWzbln zTYkospzqY}-KDV(Fe){lmOz+SCQ}AV1Qt1caqru}HAyFeipx4jOGzBhAcfED zsu?`9)W`EjCWFZUPX0{#6$T?)WnX!8c)+m`=dII#ZV8q++3|NhnRYDJEZHhk^;Tai zsf8+SW=Gv)IjIho`M2jiha1;ENv7`9AY&=3inqV1h@F2~7D&w(C3!eL_ho0yfGg7C z*63)iFG6Ag_YAke@%5Wd*@R&6wl-$p;GEua)prWP+EH#FwX+Vlx5uXl{0-pocq=bw zf`=6_FG>OX@#a|>#vHZA1yb#iI>8lHeX1WU38Sv{aj$E+>Yr+uguV657ahA#EVXQO zO778_;smq zm0PgCyHZ9g{gZfp`P!kvI*VSpCu@a8b>*D5+BrL)6|~10Ta^@OJv;`EAl^U|>Sf@Nss2(h1ev+`6qD;WmA$@Vo2~Gl9%D?FYnURilW)m=j$NdVwgpNltFl9_N)G z#s&d*zf$aM;#!&ar_&{KAAKN<36f$`z7ZUsu<~k&v4|wicQ0G*wYvD~GmVue%TJ3a zc3ZL_UUtuKg4HLh$aZ;ScUteN3pPYNJRoos*kUE@m!6CbPIA`!0?Dsii)wYkSl`#4 z8n04eY@6 z?w`5g*iaByHI49Xv)43UDC!x~-7*b%(;J*BNWWteUin?-Ac#{g!{Ye6{0-?Rcb#k{ zlU&)D!z0So;_+U8#bwNs{~Enua{%&6@|Zq!zA-#XwuLcWf1i+&9yg!^(|fIgE7jHd z8^go8GOi}1{8@5`*ALn56oKuDazsuW(f;i1u~AyMIJ{`EWvWg#vdMMRKY@U2`zL2GPOvon*f~m zOmAaV0VMcY0Yi^Dvc}YO^BVZF_*5y_l{n<5qe|(D>8k0RBz4putEWGlh@FacW?5BT zP~B7Ad|S@w)LeUAHkKik!Ctmrb}90^GFy&F&a$$H%BQqrNVnYN=kt0~3g3JxL7bm* z*cwIaGS?K|cJT$2DP*2k_MOb=HroEgu$ptT@7Q;H;yru-V^a{=nJK#n6}VS^`_gPR zq*S||*Onr|%V)J;a`I=BUa|F5`C!!vZ9-Xr*?{^acgmP?uQK#rP|C`C?|kj&6rk~$ zM^-@(^~=FF!t(cEoL`!Ucbk_5=(X-`K47m{KgMEKdGkT&(RQBt%!K;PBL5Sa_xVNL zvr}DTPPdJ`*s$nVpzt1-|e2fR00K?IR?fcA^+QIb>7Nr*jzOJ1Xll z>h#K?ZfxGG=&8Z}OmAdky1JUVR*kX8fLfH%L;)qF zGu@+_S`<{96l6Los3`7UAFJ;MN>aWt?5^A zf7Mia`!gYAiC!hIidZDtfv7uCv6sp5ZpYJ{K_(*QJ6Np2FEMQM!^;#hSQOV3BPhz* zh`v({>)~}q4@0X$ZYZa>?SOoB1uw~JiyM0)l(}?D^9PzHVYXUE@!x!+UA1lTWHb#i zyc*LKPENrO?A3BwH&)I{ytBQJy(6sMp7;3CDEOL%;NJ)>yoMu@8YtSEWu-5&yt5pe zW|yijl1BA@cT#7*m&x9|kXc&W=|pmng*vB#!UMY%o1xm}RVQOg*b~VviSS8=QP=fa z`Ea)ag;E?oQAi3=9m5M3%w@d|GkUfbeClYYBH{TwVj3^U6gf`8S2svk%rH&FZ%Fg7 z;^>tu^H{t|_r;-oH;Zg83-mrpb{4mqE6dRy*2p0CIoMdoK2C9$K2!4$sJ+}%p$#sa zJ$mHj603#ToA`dV?s_H#TN; z0LcAMxZn29HSkSJ<~ZRulO9c%eyt?6-Ai@7>yvWgn$Rp2xj@A0ugLwWs(<{8&_Y>k z&9L1)!Qoro^TAw+r?evbZFuERh4`vnXIK!S)n1=ooGqI2r=*J*&Z<}14^giQfvnH+ zB4qEmi)A+XkIM8aJxKw6+DcsJ(`>6#loMahI%G-W%FC7|UdWHrG?@ zdVrQkGi?6)i_MD4JrCOI)bxJpK?>Pj;&^(ULi3yT)5Gp{ z)F2#{>{L`A``y*7$sf^w&rb4QkH64=$G`hAG=;6*RANxtF7$D7dQgavy4*e9*~IQU zbSk|r>ul{G@0WDyf0&``y|ZgKph_MfLFWDcHZ~~Z*q9qU_(*r4qPPbl???XYkGb()HC-7Yp~3zN72QL_@ct_xdrYt zx-nKpIfOdzvTGMcTh(-GsczKQnTmJHxy81tdXqG_Byd_zgS{0lpQh-%eMm+Dn0dP% zF;0v5|?N-QY^r?8Yc++Rh(|R(SqEw#q)g;ZM*cTd=J5#k@ zpCYz(U8FM;fS=#a&LZX9{CYwRyc`!d1Vk^sw5k|3rK(GCbf?H!jw;ecRI@wL%db#+ zRU||`T=EW_RK~tr_YCJ1T|&IuiXP*dKQ)x3PCsYO9gNQ~zU_|Pg}A@Wa5H*k7c-g> zqd2Cx?b_Ko>ARN>RgCJ?6~-_eg9N!Lk~#C0MmPJ0=C-%oO0%+7{mo@; z@3(sr=Zx&QrvI24mN~Axm^j=QosqSz(r7MPmSlYOQZSTe;m!l%yAUzR0#fh;S-)rV zA~FaYlPGCPu1pT17!HOqej4mDzPo-z%-Qd&*M|UI*EC-}siRR^5+)4QCW-nMs!XXS zK*&)s=?*6XO!cSSebX+*t7l2(z89rR4_ z3|k(uPP3j_esWlMfcw{fY@&xs8;gywQ$8lpFpUriqowDO^eT^_;Tnr8%#J4OHyVaGO|BQmKWmStY0>ARSV|`JxLT-ymnyV?=#$P@9@%6$C93Dzd`VdO zM)#;Qh65Ajs1W0=XHsd}CABvD&S?xWrbCLH#HHVn9@z|Ol=zY#d~GDRm%CgA11PcD z&+mWL(tYKXYywJDQmEc-v2JV=>;X+b*N#R_tAu ztHPoZm42*J-c@M3fh$v32}zt!-o(HiULo+@Zqy8~CRNBs{K1t5<8 z`t-Z$pe}(iO}H*A5`ShVZ0+i`*=Fn77EkTXI%6u9C2<9?j4*D8M8_!q11 zsXvD&{k{hUNe1w2pU9uNJ*1M zKb8Nr!*$n(e*dy3RHsvIM4!!#xCinOwDwQs}egx*Z1qm zc6{>c&PwU`J0f9UzgQ)=@6s|esLnl(MN-WrbBgKFW48p{{V}0n`V!b;n#ue&wYRy- z3J@u3{g|Hg9gW$-y=(Q&F176^U zRxnDeylbj=u?aphFubhNyb(#UJRbF!jGnk!hUZ0A)Zz@89~sw1hX-CV1MzpC7VZc8 z$^eDQr)oV-60G)Hc3y-Y6(_i7ej^`MlU>5}?$~~`&007svY-1ghbVcjwI@WW#4O@` zUz2EV{YmxD>OAXuArpZuW=xgjVsF<=%w@$}k1rV!*aHJ$W@ZLbK`ZrN}L!~Kahz!xt+Mg%TmVd=E)ITECf zFF*)V2Nx1<%IbEMX3l=*A&aosLy8DzwZss9pM+ALCEf_M*&7C)Gu{wW4p;-8?*-$W zy}hX-wXn~O?IDaoSXf%@OKxXGFU;JM>1%YEZCYvQD@zkM!63g^jCpn}IMIkd5xRUV zXjIg_+Y;Vc?33xbs)8K zxmrfVI&FEguZ^dPl8BY?^=Tg?*feaS@L;;Qa@%a8b`^8_eM?*)%(~~+#89)XH^A41 z)3=ZH@>=(|M>nmfSjv>Tit{R}l(ts|x{GPz4QOASIW9JWtE+B#8~F#w`SDv6BVM(Q zof_FR_!baIHel>%FPolJL&J$T-xyWk=d%|(NIx&w3Lxzvo(#FhuHpv+!?k(XM;Fd0 zMmS5#bO_=mpuyeaBdxQcvZZzT(0w_rr#2>`j9Mejn$zhztC`iN<&Wt57&W(kz_4LK11AmMu;ZWn(OMlk~+G8>o7 zyIB^JF6Fxm2OeV1ylpwcQB+?z1}Zl6b!CsW+9)SDuJ4Y4^-H^rk|c_B`4{>agf!20FZb=;m*91J^R z43hil2S1-2T@ZC{#`lyW{JmMUn)GlM8QzJn^+iD=&BVkiuvF z+_jgkO$uQ*Ym7MM2K0#8KG{>*x9hzMI2?Ua9JZj^^i=0c; z4V$Rs1Lhfy-WbXV z9N>0HTL(0mxx!D%P#Y9WdM5is>8U206wDH;;iL|8fSdhoAOp35Te|^Re$@f4Dj)u; zwT7Cba1PJkrV_t+6Ndl@0^s1~0|9urf&2h&ZXo0@?j(dbi#sfez zr9T-HnyLNU9lyGBGDcqUw5KSFg?81IRs!k>yP|XdU)R7L9FdY1P>0_RP=x*^bE6K? zZ}0xc;i&oleR`x3sJjPnggJmq6OIr^IKqD`uG(qBO_3IkC~n8gjb6t8RzSQU!0*q0 z+wr1m{?rKYpq#>s#(n=RAP^slYVx3|+^-_Qk7kX(3aa9Nngc%s-8(-B1uIcZksrbh z;1xhO0P*pmW{plADB5`SMr%Moo+~>Pg%kh-__=ujd_c4_eieSy_*YaCMHg+KA>in1 z831$_f5z|M+Zjzi9k2YQ05?T@3q5JIzxk#U8ixJ>8M*)Dr>YLNrcNfPs~{{UP`HCF zfSZ#W#0dnjSRj#hj)Gh$P>a%PT zJ|Kt(%+1fs4Fs_P!4H7Ihkp+GA42>a+DbxEN5IznYT{Q*_zT(o2lTo+z|3%g0B$~9 z;NK4b#Lv&m4=@A#r2~TmP#!`*0K{({HxEB5RR7R%^P$rFf9rriZWPb{Zyh(P@jrE7 z9$u8!|6#``@Gm<7J_st1|7|CL4)Q;B5cHP(haH6fU+Vx1ptj6E?7(2qA9F!EKv9R> z;p&o)I@}$G_7(cTsoC11g8mCHqh)!787j6{`{EZOhB_dxybFe)S}-w5tH|K~KO-Dh Ab^rhX literal 0 HcmV?d00001 diff --git a/images/progress-type.pdf b/images/progress-type.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7f2d1a8d050c5cb149fc50e075fcd956a5210739 GIT binary patch literal 10242 zcmdUVby!qe`?iEM(k(D_NDKoEGlYbIfP{3%00Rs$)F6!_NJvPBl7b>3-K~Ulhcrqf zAt~)|&~wi3yyv{%^?m<-do#25bFXLbXRT+g^~}X}-y9F+75Rk(L=Wr-xbzB(U>9Hr*?@OkMbi<~7_ zV~J-b9|^-loEGs`213Jh6H#gUwoi8(HP1gOrZ0fAcJ@AF1Uz=S^&(+Pc(W2CXTHE9 z;YANagrHvGo3q-0?M;&=Bay`)kwO#a9-S?rF-v9-hqq*TsGl2|h@RV~Z$x5NK^$&X z*-Mn{zLV;_fZIgyMcpj``g-WTk;GpYTxJ7a#%~RzfIeS-zaK z>J@pIQ99%>Gdt8+rhC3NNS-}0sG*9eNIy875-=FMOMe!vbkWhrum8zuC^^|_jVaYK zGV^W+n1tG5+GM8VYM#rMB3fpHd9G-^{Ref?-L-}XK2|A5Ggb}k7%V&pQ=huH;1{{M zZB-i`Dy)t=E@#tEDRx50ewMR-v{Qp3}wml!xU%1Jno2s6{EWpO8I@XC}+(Mkq zZokAj=6tUY!e-g&ODFv337u_lA5lWAFdL$+Zw%Sir!)-|u9_hg#*pu82Uv9y1A}y} z|K%|L3FqcK%?eh5k07lH-btzImmD=V%Bwhu3{sN?qsq#B`h`QL7H=sXqs6`EF`83S zI4g4xU+fEI&i(E_^3w!3YbGqIYt)ZWbv{{zS0kQ}Y>4#!J4qe#hjMaSptoea&b*22 zPuMxMnX>n*6ysdtYbA>X$Ke@66!;EZgMfqf&o&ISHx2hRYO-|-ch_;(8JONWn4mN~ zTX^s}G?N__Bl{`^R3lrLQ&W0)2iuiUq6tqQGi6dvgwD9VAZ1n7;2hMj<$RReHmWKa zR3J=+sc*~6+bgckLN+2wrNOg-$IHIqz`OcJm@4j&2oWIO>pAbV(19;D6y?cw$e`(Q zCj5oYz(KoY&XarD)qfE)6YGq=LsFXZfMYsF?((XE6gll}xnEo@kR+gFvDzWkM%^Iv zQR>l>O&#<`*Ya(1vEG3XMyVM~kM}Ds(rio@*E+_h?ep@fx2qTqo`wuqgxO;@X5|Pg zmTL%X;XNqPt$?JySu)ao+?#$QVY`muoG4=;H+-44RaeXdM`fZj`c%o%h+$Q6akEct z+sSiPv2qt_a?^MhO12VIviMDkj^UK88xIk1w*mk}*<>F-60ZPEYnWF62eo~vz$^ej*yav#RAPk5$0D|{q0YU&lD;WA4T`$lLzrO_)osp=&dE4*<`GElN zpY;aqg#P;@80fEH06`sZ7dSxhAb)!^0$n4Gg0z!(hy z0YbtM5C9|u0hs=BzlRQt_IC$d*Sp`v?Y^6{hs)nO^E=2d^v}^~qeHvFknS$m!K}Rh zgp~m(HxKxq);+YBJp2*D3a)iu?i&1KtXgn)XAd_kxI5rFZ(4sFni$&tN6m%&)vf03J82(5 z)OKZye2Qu~o^fuuY@YK4c@99`ZB~C?Ts~gIfd$8ef!Us7KF|{=@cwzBT}b-8A(*tn zLV0^yMtlB6F-t;B1SwLW#ISn5qW|%xY!?acbd_UAh}%L2KjvX(kZ*`SpQ#R~wCfsv zFnI(FsdG~mg1K5_(a6}@#;aHSUgA4r1-6yJ3vkIhvvRyIZOp6*lp%XocWB1POG!p0 zX+1we9v&=?y!2pDorGOz;0@!F2K7`?SEn&yAfU_I{96oNhd8SUp0?L%84vu~Vo>?J zTIR*9xGQ5>xA|;`Tj|NAzlTqJoNB)KfvdKGmjjS{n_14X+%C0wxeD&c4|96D9;}^h z82n9+r8A6!XtZRq?%@~f^uckl0S8mkz zy45}SLzlXfz{ZHT#s|Cv;Q)(^le-%7$CS$hY1$u^58X9_<4*>y?mNm$xA@xx?1#T& zSkgM;{J^wD?cW2uO&xm2+v}-87UdVZuc(!~dl+ws1Zk4*IJh<$lx$JXakj%U%E<3L z@Ky(grx{kJq!GlGwsXtFICxXp2ihJqvP0zarW{=FJQpS={v>@90XkZv{xY!a}k=9_ftVyor;Jt%9YoCItF}6slACE z1Mhqp3)&b`a7J2@Sq!QSwLWHQ(}1z(h5g_7swRw#-}^UJnwjky6c%~zP(BKq+o{*m z;kDr4kX~2$z=Ihg*i>lYM)$z{cIQIjxX&%b<+T2J)#Sv|i;RZnjE;7Tx~tm-1%a$m z0$o!D1q_C{q#I=(2Ud@0jIpPHx21|!+j09{mC6^-YA^lb$!FusCCYf-Yla(bI;q}P z_)ui+@Y(i0i|`RggYd{32PsnTSfvt_?jy?efR^b()26F@)X|x4U6zc13*~Y_TW;iL zWc`;pI>*cJ$k~2WJX3Y(af*+L)DDwIO^rnHj4gl18;CT=)UsRs0c#E=bdajA=1anI zOOw4|xetcGKpGw5dOd@Oe#bKIl&&er)eYerB9(=jZA_muEA75HuyD%v zym2NKH-XhTa5KB8{-Ig!N35nU--;Ph6%WnmeSv|FJXeID`s~}H1Bwd8mlGTP+Vq*qeaJH0f0WljOdDoBm+A%v29nKE z8a>FPUO^p><{5Rd1{s*9-M%J>A(dX#u$VWl%V$)}2qvPfvQ@+oWyqqK%C-y0im8vjOMMv+A zg24EDADt3Hv;Or6EJ$mLO3Sdsz>vf(BUNXswueQ;+m2#-*AE&3z_yp?czITf}3~kD&;A> zKCIl^`)oU?6jOYpkadwJ_PVS@W8?O|f-#nF)07uJk;A>rWnZ(!1+y519x*sKk)e zYVf-_?3MP%rz@+%aa+UgdKEpdy{n{b6LOP~f3=;xlbKt@xX&|{KM?S(Q(9)bl4toU zuyAg2*^Wp-7}Ms#(4cR*$(5W0vzr=Yr}flu0uk1z&v~mF&ReSX{i0o?Obq*bM}5ao z6m*G!Kp&tjXu^@5`w&1$#1X1R9Y)wk$VXU27)OmwjZMRzoa?AIP5+sG_GP2!8;*RA zA#z7$r1FN+CLRh~B2GfvDJ=MhSj{4>ss2H$=d9i}g?EwH0sU=9&(&?^82Pms>44n`QX=D04LU&rKf@R&Ps` zu2j?{9YvnI`8#b~RPq4xN675vb1sVwM+&{fLArzp(&@{=^`%R zxnn}f5x>UUldzChhw?{@Mxy|CX%F7tqA5MnjA3sXFAm4Q2JqXYppLA!gH!p9|6+wOkB)O2t z%&xv>F7U+md!8r?XZ?8khM!$0bN(g$p?H5n;UH%e4-%t;=PULDH%RL}50KiSY8YYN z$DZ~tUm|7f3?TfT@iYs;3AJIJaSUT!a;XF!B&(m0UX%C*M?(h)@>P|S0 zIj}z&b$rKysihm5pJCqr+6O08GmH)P@=oXTBa8fyeT(Mzl3^iktd%Kl`8B!i_cK~t zS590T8_Ub%3KD|?U?oq@FWxEkuNJ8@6GoyUlr|9#jctlgyum6;8CmSXYR9GI<?OR#W67X=ql$5PPW&j(NOkW9!oqIk3f;YcG)p;~+RT@buwKGvTQ*dknu@X0i zYwW~h!^@Vx>9ep9PrQA3#Mq(JtFxw)oDs!SzT-Qw($0B{p}IFTErQI#u^`{4Cdbtmmr%(^0f z#k(U;>2;VQ2BjXOU1CRNPs1!ZF}ZQ0g?lbe;0s#guVzgaYZ*o`&F^I<-yFYu6uCBh}ItvsS)|K1rwZbHCN;eaT zSA3W9!pR8V9gaqDRcTL`6_hK8LcT03Z8!I(T}ldGwVdsEWe!UP(H5>fvZOpbrV{u zXyx#i6`HWsgyb;EWKw<2m;v3I z6!NCTIGd6jHnKncpXA)gmUMx{E7Xfz`N>Z@l*Zoyraifb$;5BrW55t7SUkL6*@(O6 z?X(fUCF%F!b6eEtYMJSiQR;-JJGyPO-2s;HSN;<|zHXNO zGLDbHXRibAZ@q7e`xtiMy&qIvYA(m(umeo5QuJXXWor`OTgQ5edo;Z0$E4IlBlhOe zTTo1tP&IwT;t(lN!bF|30}sWRu@`$8)$n6yZmzb4u%PRmt=O^ODSWWg_WlO^S!#9T zdH=%W?=vKiUCRq{9J7xjt^&6EpTUj%bjJaM#n`9)rQdpWsK%^?L^lqnANUosVswda z9X;{vJmJDVhzP9Rgea1%$cD>^50ZvIf|6+u`z^}29|Q)S|IG4#zEo`T!FAeJ`{(Bw z-xU?-UFPH)?h`KZ=WpI3gw7MOKAqFfX9jmOp2wkOwKVmW{l0Dn2c%htZy7u zHdHaza&KL(gO*86k#Ub-J^0+HzP%`x;l-5fN$mZ)ynKn_KzJk3yrs%-2qWauqwbQ_ z;ci07*$|5cMHy46B#Rh+6g$0hZ}~~gsBKT!6>iSS zq3Zh!D&<#0EvQYh-Sf7QOu_eV+X08+LAXW}yL(^4?`y~m<-GEeS1&$RSXv#tdwEDg zB1*e|BiULSOQQ3Ontgbl+Txs(Kuzn+47-Hj_iC{0A{Ez?3zbUToS9uk!(4I0?4t}| zX}?&-(c5veyxd(!BSt)J)>_I21(9y#01F%Cu?q`5XWd=p8lRG}xoz$nXJTES&TsS+ zSJqV}N3zs4XVaZ2Q(mE{-9IvwV4X|5H(JolHh$_q-@5BbpNZ7)d{IVIODH+Ib#QXN z)q4UzhVKvz`UEsIbOb#AQI5&wW@DeN7f(E;`p8U30$OwLp?pV?0cBwu=ak8@SKj1` zD+x_VL|Z?i8s(dmz+wvlrNi{=CAb0-;0YcH?`3-Q2}~%K@(2`?SRG=Wo`cM}$sY9m zm|E*TxP6d7kUgt~G_^6dH|OM!qk&T2d62%cW7nvu(s!hlZnE}Hchj^X!E}JJm~rRr z3AJy;VVsMRq|=Vu;4`iGXj}^GcgyV$6XOq3=)`cuZ)m&zV7VCPd5pnM$J z69=Ow)>5s9&Nh?TLAWFvpE9c+x9w+^z?9D|c?CM%znhp8dDsskq$k9a7lXX0iaT+u z2%Ji)H41^24kBIO?~eV1RH~~TcX39HcRP|bK1_O8ND61UYZn&0XJBkVVCV)=IrE8< z30wyT42@c~-AHB$*%US?B00q|+=_VhRbnaCIBZFn^i!T-#3c@8qIQa> zOZUO_m&UxpXz7_i8hV$u5FAeZjQ2$W2O#Z-9!uf+%+lwWR}ejO*%!sKMRnS0#9DNv zoMov$z%wpV7)50Yr;7I*2h&P`uw)|oHO>XzqQ_XGTwshB+>HK&grNo8O3$4O%Zr;^ z3SLiRH2S@3rf;p?e+8P>44))UnsgV^Q;)gIWF)jqHFQxAX2i#!uEr;|xMddn?R%RS z^MUw*EPy|TftJBbAjR+C?UgjmShVdDw}~@tV{z-zQ{%~&%bcYP+!G+OLqpbq*|Fr_ zIrOT|^>XGe=h6%bc~94#=fd7L^NQZ!)q4j#ogZFdL>R7Jgk;?xO)$%!Mmff?AZhnX zCtqjbe#fOv{TW(*;p}JfGj#W=ftl~Ms71QKA$4U<_76s#aJ?L-cZ_G5Sy7!Aclb)g zQoLVmIxHxfLKr86eA=K&gYPmwy6-PebCovV?%cL3b8X5r0Zzl*0u5i#Y7sSAE=4~Y zQ|K(?t0R?&Y?zm$7($?Qc1H={-JIxGOwLw&nl~t8JM4R+iNbCPq8~z5Dtbcptzw(8 z%5`3M95Uwk2R1u28fF&e?#d+9M&E#_F$_aS8y9Vv~v z>=Z%*gcxaU*cu-{G)D!V3d9eDeAo1PaTQJY{UXN>v&;BaoO)2u@CujyLy58{!LEl? zB0;ZU<*n5<82!%*8I+msI_~Fi+XLLc;8$26#a=!WY%>NJXrx+bVE9J5$ZT3`-X?dScViqhv_dBA}Pt$w#cW;d;?OqXEVjGmwRuJA;lA^RW4+@}oDXW;{+81Gm z;m&yGoI|%|s58(RR8lj$wPrL#WFQwBm77Fv|OcD6f+)(pmPO3<7N$Qu;STo$Z!Ao;6MgM>gvznrTmEz?NE1|AJ46LHF0-4G?NdahjdUqTHD<+9&_n0FxuoVb#CO?0 ziR}Afot7zPRKzkfRt+&jyEO?`83H+jcFP-}{UB)ASPq!Wylu|d4PH;|Hx?6}|xe(=ca za*f=$)lY7S3dQ4H?e#02a_5r79qq1WfuaJ;h99o6J5_N_rnLE^Q=b&}y~Jj8=44(C z#7Hd&k4k~2a>^UBYkc_-vmPN}d+W&ot@>|7Q&p*Rb!>?{d>j(Gk8r!H*b*HVI7hd` zt$io^kw=^jYMjQ4RQx?GmigVoTSA?w?@@^(G85d+{Gd6!Ww!im)|Cl*(h>+1Mf3YHGf8H6iV-Hf`;T(&yvf>1mi+}a1EkyjIgAamo2T~8-S-2`aU z#;S+2Da2D5=_qh&#I0LDQ>|}P6EW71^h~uEfb?Vg=Uv{KTRgBBq_z>GT)rYL3${<}Z zmi!<$+eVu^;*8N6f(MpN4ql`jQJOZ0vp*Uc(*p5z$cFZ_@VFR156XS#2-0|pAxNAQ zCljZNWk$Yk6Tb*o1}RVXaXx3)gkSQoe^CD#N@i{%aMaJ@`hBd{WryP068ioDvMXk~pI7}`rOR_E*HPC_E5 zlG>zz>v~92cP?I&W?#Z`(TW|ASoh9h!$PJmS)lO|MHe5{E<@){DKmn=uM! zcS|`WBn;$iSsqfYx%{%PyoNM2pG-R z0!0BrLO|#rtgSK%=7_M8McO*T0l;66$+}xz)4os;M<0Qvp=wW0Kb`*j|cs9$un4w?hL zPSr0|4(1O3MQ;BeQ-g4GN8Pi7x&4^{RoEXkNa%Oh|1;s}{{OYF6p(1;4FYKk5Y$H? zWs&ZPKY?GN^%2%6J9jkgEi80BjQ@TL3;%8d3xffF|NM9F{~HPsx-JC2PcZm8S^xbM zg`%U000E+4G|PW|g$e`T}QNB;s0F1-MSd-`C*3x8$_|!u{8r zKR{St&-k@QLhCPugU7WV;%_nop}$Kg9=bVOdsv~xT-;VLgqt%!NI(cI00eN`p-?XF z;(}gSprttlR~h-E4V&Z`ylksTT_2jFi9Dd2q{%gMc6rpcoJ=BnAQlMeYD0 zTtFbt--G_INaRo5#yuE%8#~+n>iDlA{Gr{@J|c^g`&5~Km0_lcjA9y z5Rfo>nf((J{a0Lw7q2r)6+)yz^`+4U6Qq& pozatio%7#6!OBP*^xXbh7r#X!FgMh%>_Q=8U;+*f1r0@l{{hCo)b{`Y literal 0 HcmV?d00001 diff --git a/images/spinlock.pdf b/images/spinlock.pdf new file mode 100644 index 0000000000000000000000000000000000000000..79e30585db9d043643ccd51f73d71c9720a717fc GIT binary patch literal 9908 zcma)?1zc3yx5uxbAdLbdJ<`n#Ff%lQNJxVc5;DS&0|P@ygCHd!4bn;|-65UQDcvRA zAt~?%yxw>3|GxJ=ZwAge`*+qpYwxx9S@8LM7n6#VG#7{)jLTHInRkZE4*&v?FU@g9 zL;ySra4Qo#Qvet(so??u03K;GgdH4(J|kdua4EPU(g=<#CWdQkhl0Z_aUB!7HD{2M z5PbKe7av>qy`QPpm9}C$>VJt@b~yc3%&xt(-Jzl^#;EDyEG%-1^UO8CP3tv1d3*_z z>Q7nsND0=T>woYtYunh5xLS~kyPQ3#KF(-fKErAjKi(j?xE!m#1Vlcaue`zJP6qj= zOvMFYE1dCv1>d_UcC4_ff07 zpp;6>1jS$7CdP(h?kP;SegbEol?|L^?Nd=Nta-9#rsA=GC6+jA~12R>>o`R zy_nFc+=2|~$b0Hkk&LI)BbQ|W2BSTGo(8Y0V{x=ZhTkh8;=iYt8P?A|KPjI5>3gGC z5Yy^XdW?^sh!f5##FW^eQl*+PLvKyhK^(>R-A4I13Sa0)6X2c1xWQ^f;6yt7dv%h_?L6G<3q^&gEvR~JV(h(GBZaqF+310M&9RIPKP^uP zO5GL22eRO|3G+49tL5pOik48LfU(+!SQec4YJc`uG<9hM&iLV8 z#W}VT6M~j4B^7u&zWqCTI#|fu5`6hhGYU&8G9Dy_FCl9>JhXZc!f=XNg8yifu=?pJ zY8#8PQ_9$}EOT*{9ejiEyPsjl6EO>&LBx;fEISc*<0wjUlki2pXK^#Y>dO?ZZ{j8| zyk*@Oe(XkLBbuX2YW<~Gf%P}sw%E$P9b38HS3#sRHYIkOM-7I=7;}Kmo*y&umw(_i_SfRVt&IN69q6m8taFuWuFmLW0|G+% zt_-f4e19dS>ooM((N0F)4xNGkA|h7;`ojYQq8k97M-l)KfX5JqKB5y7`o^y#k2KQC z?r+w{TtF@$0P;Jrp^eZ#e>&s+%Nc;@sgpGvz@q{)fdiNTpljzUFccj9EYR=1Jc@85 zGngdO5uk&XfB*rY0Dw=BAE0+_{Rr(BZEg#=%B#PUs0<2eZ~eEi{PJPmqeihhXe*{1PL0*1=e%ikHP_dJ_P zFr>z6-z!^?bcV<|Ui=wmRV9ZG+DQ?K+m+Q=+Qesw(h301Ii2i!HyM>>Mq`$)2?a)52 z^!;ePlLF<3&8 zJb%7_tkjsY=L8?0-etKh*I#J4LXWWa+dvxDV69UGM(5>>{rGTBIUHZ0U!LaNAR-x4 z`$*QDcQ{!~DBqakl9i+WR3gVO`n<$-EUUC}L<`CAMpQ~b4aUDmwn6;Ff1Dn33@eCkrqV^QYDhl6pD@Y4~z|DQqr8TWJR!XedMN; zWAR~U^FP6SZUh{@nLP4>m`W%}Ip88DnkT5&Y$tFJ+L`StOgmHM@nRt^QNsBsjHIL< z09~bVI%(0ISkH62I3W+oY1LNa4I{3V-w7vG%U#B*fv^xDB0olRDWj@9hdU;orA#f# z^^isCDi1obBvX;5FvfH$KSSEtwFp_+Ib>*U0h}1BDlrO^xJ%_Z8W}E)^v8WmqWK>N zu#_GfcR9zVO(U9i)e2XdN?}bV8UzPkO7kt<_OkuLsU_Avy$s~ebGO{ ztA;1igv|AB-ZaT$SW`unx{Bk{tgixp#Enp_OOP!mXAt#G@Sbu}oNLYVI{V^-efDv~ z4sIlUdBeI#Ww?hGLXj8)U8zr6~ND^9(LCjJSb+0coe(Psc)Le`S#_Bf?}ncKA}%=%MUrC&9Pd;kvy7- zqY~QBfqoY#y))=cN&eYlUuw-rh%L#z@|qv3espeoRpi-(G2LMIktxe+aCw((R+c1< z9SH_>RC6T}?9BV3`C~$LAbTe8KSz__{x#~sQYBAzx z3?%9j z=hjZQa(U~VI%U`YB`l>&@5L($J@&VHk@*v??F$LiPMHR&OO1Yb ztE`kG5VGiJdir*Wl|eHs0zrzpKeCdlR@Q^<#b=(+H1m8$^|vYa>Vdd#?r3M*n6g$w!G6*X=#rIu8KU9ygBHu--B`s;MauY>}MjaH!dA||EQ zkNr|X9a+6!w(^)6I0<|`S1tHDFU3`PGPv4{i;TdVNQXJ}$pTjRBRr(`Tduv9tRuS0 zy&qwphRcI3TAU~ z+yJ=_i*>|La+IWcyEaC^&Zd7fIN$aH?>ga>XTWA^eSO`|j^48-I-UzD^5+Gg$;-6< zSf`=!(Do#@?0diK(gd7lI$!8*-mPH_Q0P||?AMfU#>|-xQ&Dy|#tr9uQVWnvVSQ2* zKxqotZcYa=_>M11FIGNcbMEjZ{qm5z4{!3;j1Qx67S|*>Kkoc)^WA=9v#yrjjf%rb zJ`0cN;n1_i?2+K!jLU&KwXKu_i+$fIcA4g*e1w)q&aL##kB@?mLXw?NqGIN2zsxT^ z6L`I69M*X%8fM4MAkqO-R$Ne%Ox++nGpb@BT{=I|MuiT^ocEp*VyVXWf7=M1x}$Ul z)7zCTl@zo_71KbV);5ahoEH}2nzTKda3xyPd7S^4vBX;1-YuXUvi{29G(d&Wt3r$; z?jFJ8rv3v{qjw^$Vy&Q7K^99TQzb{G)10;lGiBc-eDT-|qcl)1Xz;$30y2nNNtjxQ zS}eg`AUtIm(I&Vgx+J_Leor9UnsYbjC}%t85;2Qt9Fx+-VWZG+?9ARXypMprnK9yE*OlEKqW`+UhTZU?6Fr)gw|i&f*!Be{wZd|~N3UDCK9*gmUqE|E zr)aenduhuV)tawRRYL`;xCMI0KxsD2SHUY6Z5qodACb8M;LwFc<%W`Dvg~2>qyN!< z+><-~Mc^)GyKLnBI~a>~{(LKV%vltQ_oX99obKjrw}+CVe07P5s3V?J^U?B~_=|T0 zVcR5G;o(kd&8vK188r(f{CuRQKl^j?p%W`qm$p|i)bdNx(}>f4QL#mgElY3jh??Ts zHNEyt5oPtD>=*r3w+~&dZb$LkvQ4gpWe0IAiSSHSKN0sWy(gBoirW?8x=)(P-@s)s zs;tsY@!kUF<#1834aC_LK8j%`q7yr*O-VUpCgQ2wu1%SXASX!Xy%o=FyXm%RBYfXi z%i1S|zBc#U0A}2*wR1(V8pb!AZ;VlUj8IRFQ2A}U#Vl1Bed7KP3s~lb40&BjX+(tr zp!JY4iB3u{#!(Xj9)zs9+`QGCS)ba=zKwgW0|mFzY~%|ZbrRf6?`+*ov>$Y=oUO=O zs1s3Ge*QE#7allHYP6k=bep+;D1*5sQZZuD8 zfH7USRJhqCVS6N3g%PJs1uyb3t8*KpaoZp}hhotLl+c5qRW6O{K>M|^+yV6LXLtsG z_FL9~JY%`LqWV3XROwl7c{|dN&uiP=AKxTwDoD&Hi)L|rHfz4AXeCJ@D;6>zdaE00 zYjM-Eb5I6)tl)DoPkKu&Jge#?&4^i+7Cu1atBA;f*{_cxYTMyvnw^qL$M` z|C>sy%Cd@P*z*;(y{SZgC3Xi=;Xpk@(c*ONJ zPFp8QD5Osvf2MR+n8ik^d9cQU{$}P6%YDSY)vo342n(1!cuoEr+eu{2kT_--ec&Mz z=OBNIztjT$gs(rSuzVSqM`1P6LXXFO7XHy7~_cA2$qB^VNxZvmo2ghn) ze2_=!hKb-(q}j$@n{ZcA(-?$G_maGQN!)FUlpF!a@CW9)qwrIZuYKoq6>#h8;Neeq z;diE=#yC9|$*g{IOorJ<1wDP{mTgZpaaz#?IpB-MXAD_$l7qR>tJ0J}Y(liQakhgt zWOL~Xrqti!+>L3IbyTV3gJy(gB;Fc0AJeue3(%GoIgImubzkp=hn3GXJ)vNiqjd}FZ zVl)dhU$-#zN2tw8p7*oX%Rc*9lnX@c=X;^@gRil4da3Fc)sn#%Y z`%}sHk-7`DP7ar^y{CegymZ}4b5oB}1&k+~6W-2v-ru6nbq?dSd09NLSOUg_kn2ka zK=|>JY#Ldgv>Q>`_;g`id z?Ba9Tdlee{*<+L+rArc;V}q24x1jFpl4j+F4qP?vZxsXZG~ zpDd?s1nPz`hD$)!Nj8dZ5~|*!1!nC7wZF^p?z9gbwlXA{&$xECYjSvTfx=&spM(aQ zgs!kWh^fD;NAWI{qd(i2QJDXg!2WPiYz90o!z|pGx?rWLEl%-Lr%rz>iT-|Nf)7j?E6bp1!G}#xY zQ4JC!p(pRYsl07z&twG1X2#`MKS;R8oUv&fPTspSf0EcgSB6o}--JJ1?yuB_%lKaQ zsC7k-%W7jTK)ht$rEP=ML;{mk#nFe&>!rFBlPV6o+>@g4lr$mIls^7&>3vyRG6kjY zf!iJL1v`&O2-F-dft7ijRp#;kTSCy>Bn#vK^&!$>~wCDL>!fpdk)d zKL>nqBb0cFb9f^oyyt?mv%|6KVie9C92@=!Wp3QangLPdPxV<-BYQSf!YLDN`(!+_ zFS6jwrQ-g9GAjFFklr8B43EcwSlk?Kr|mylvz}BWNT;kXOGK zpRXI6u~b6A)VlwBc{;SIMOqtRTq zqKhe-60Ot3DY_TSM-H!o-0W;FYeP!S$_@?>d*+^0W+5k*5grL&##y${PyZ0lPpJ+d z=qbU9*I1WuvP|#UOb{OVEJ?bm2t%p#KyE=OyuWhS@f2`}`BRP&n+5E=$3F5F)f8x^ znaStGZ^~VKxO?Jt&#;c|c&5&2YBD;7EZ3!Md$QDm1y|qc3xbL zcWlOOuir1ZmaJ7RwXN}+5$-8syi4_KRAkpJx#E;B8@w#wD$Utj0rP-C!f{tDmXMlD?E}KPjq=PxKgE_QonyE1bAHIzm-Au74~?ugqWP5 zZ@j#3@{-v3Y(TC5yEG&Qr_mN;4PkT}99hmL1`dSd+^#sx;K;&PG3j5O#6+;WJbcft zP~IJBGEUyln->{{3ojcoBFLSi^^rM=LZlaHWZ4V~j8tl{We*OGug$Kd4YgzT8;%uh zFBFl%Qr$AyMZrBnLT>herq`}mXrP76+Gxx3whZY8+td5DV3|mErruXeH z9uv;7+~L*h4Hy-<0r-d&JLpV7X~mOXouTHCDR!HO>JWrPE+o5p187tr4mtIw|Kk4`J#>bz1i657n zR-wqaOG5zmD*yAhy(34WN3;7WI=z>>uuovRZ=cOb+^{+uYEiVE@dt{oJ?=Y}`^5eP zg5h}DY6ba^PZ}=FhV#qwmsL^M6?`a|(JRfTlGBQ1f z7#2M^b}#*~quOx?^*EfUI$a!J(Pc^nPD>HapPLvZ4qdSG#NE>2<}2QINsBUuiT7~c zA5h$^^O|9+H%W$F;^xpX?&mH~?c_a6@_khSe&e0#2pw`>0GRpQY3rK~P9D}lMpU)2 zp_;2rw!@`2puyE4aDI0;dcjBB1Fh{3oC9)LgxheZ6CUD5^&hbpcA3QF?2(-WfQ!a7 z+PPuh1!dW7l1z5i{pL6M1C_khGCmYr`%;TykQCT6WPX<~l*&~@bn}Q|ztizsIO>k6 z?mU0fIiGrP`<>ytU2gZw+ZA|=xew)7X4_-;n(R@;)T}Uimnel2MJY+<(cuV+V>=qs zzRy0SYEOon@P9UvA|@n0^&Q!e+IdD`EWA!-gr=2Q#~wwOu87BxIomoos%%}V5oDIB z;Kh#5>r$E5IN7do8ci1KXrb_o-g3DbfA76qnM=lAREiVg7~C5kov+P*$HmoBwpzS1 zoCD)43*n-b;vAf#H5Yvv&j-un-9s`emTWB5bgEyBdz_zP*SyN2^G`Mya+O{1ee7?U z`C9CA`pl>zxh3!$5n`oPQ22s5T;IYz$?mQQ|} z-~X6rp4&XAGfYG-t{o^(181wT^^v^>`7sUaLNiG>@&)H6vsE4P@b!Zwb#M~&h)g!D z#|b~{?-i+o9NY17LC3vV3H-BCCu1tj5V+4WZiN=8hS|tT@6+;rX!cB2@WLKxqQ*2PZ{oysN7Q!>?0DW7=ro8UcPI7EJmNsCIe&Ta zn(b&MbxurJfz#V%;IRXeLBYSlQDw}LD9E-+OUc-fr6`@<9q3@ zw>lg$itW9AETKk0HFP(NB22W5R%P`|DJ1VJiVgAkULeeEL;i2PKjg7Iejn4g+c(Yr)Q?3wS=MCE{o zMHq4v#0p&|4XdI4FBH4{Zen5xnAZPMhbxvJbCOlRz6;DJuNKb)f)p!u0IWWZ=Vdn7-6tN0fXnh*#|@aN1D7|)+K5&DO~bC>%%3z2!Y{!4 z8&C=U8{>ek$kYFi(d&iFTQvcQeUB2n;FbOx_o*n`i=L)a@tmp*(EiaB`aBEN*wa|f z(sSJ8(dq!w$onGohFCrPU_-@5IPOPTrw89^{oC)Z z)6ry~rmJF0xp>6B%@wr-ZzpYt)MG9rkR$sgsn313Yn4oR6Z0SZtiS2L$h0nG)sSw@ zBUHVH;~18BRHL}OP`Mc~kvwmUSD#%it|Dh#R&di2VD52aI2zunvn+%)eIZN&jcJHQ z(r9x$4v&UEzbk%MoSrTqB;Z-BAXiSVKZlV`TS9a*nT?ME=vK>9BTdtocQ1v$`+kui zSba7;(hP2Sa4!~IC{1v}eh~H0=8rLAs_(BIWpyKJdK|K}Uv?Ax_-4TKL`@;UA*-Uj zMuq!z#Z9xD=8R@WTY+nlTAx!~)K8{aV;XhAV*_4~DyGEVVbAotZ7oq6QaZUKoQ4G! z`1eFZ;gUbx>F_*7Q>?*+YHraxDd55oU@K|xnA@i&J_-;btbLXgrF3%(bL$2e;HfL(#t)vcDTwQ$#~YI4|IeOa7fxUUAuf zhvU~1>|felGufwT5_%P?YgH0v3%@43|7&Q>P_}lDOkt?s0Z@Sb*#`0c!n*$qIC}hl zy-<&>&~+|mRwe)*O*1PAD_gVQ!nJEnGb1}wTQvLS16@ty|NcVwAb|hr_g61I^xglf zKcCX_`iR_f@rV&KmZtm?t66>yqetAH9vaPSJ?!DUid$+ z&>95Ddu?}R%MTR*fI)l!ejw=D9{i{8tMfJCMw4pfGb^*JQXs(9qyD~*|K8SDB;EEp zZt`YES5dnfx!PZ>-2Muz{|#7!{?6i6P)H+tLv;BOt0Bw`g#>`OK@e^rfYsE_&e~Rp z2MztvstF2aZE9v{%Z)^tu>IQjkI=U@&@9{-*{-uk=4PLB6Z4_#Ya-z~AibP%!k%j=C-^Q#W&lU&ZPwt*IfA o=tciks&Lhovoc1n?e)I+Rc-)7* zGVMjxabCUM&8YBjI{t|pbGl5X#OC43^M1E8)9=P^>Xgi7|JBW<@y&gh4zqPvPE+Q? zYkmv7mv_naj!e$asc;%_M8y#ZOum;Gv~)`BowsEE`i+!#;+pqQB{AR&5 z%0Sdv(nrO1y!U~eN4cphPo$^%Gbsi}^v!9|^fSEY#xwI{n4dtN5sDMKtMIA!x|~Rj z!N^t~>|*-Q*ZrSMsnDnIX4{q_LMjW!3IQWc+h&|ZOX{p<$xll8isBB<4@(2e1`JU3 zr_W*pf5yBU$~{T#6#B_=Qr4jZfv7s%(MWX6Qx!!E;9>63)T?k;j3xM>)GkmR-Y86k zyG~ereHT$(fv7@rPEI;kwi0$Z{Qhg%vjmB0vDbf~xd>=@?GVwyZ_O%QE}3a)M~A`> zSmSDEtl%Ayi;O>E8cz?DFm(XF_7g4_v3W%>8UmkZjq8w-fs;|4eneMWn59bqmL=Ob z@ZtK%)YRe~&I)(3yv68sGbWX)SS`5Ja4o)-s(7}gAg}w9PhNLVE4`^UFgy%5$rhOg ztQbJfstvSq-G-f;aHL%FiC@_0lWwpu-%RNWM$2@B$oQT!a7h!rgdST@8HJqR>f++Z zU8DQ)`(-kXhq}>$*g?<-XJ1<-wcyh!F@rfoTpp^SfH|t;{u%b}bDttDq4aO*muEws zE83X)(@&w&!jg13lVVCTY_pp{N4?F5#6j=`F4Uvv_NtM44*xkQHRl{$+=MFH|WP{xNNAvnq*t#BklR< zVRWO{uV)>Mb8K3*c|*fe8opwUB=Uxoc=X8XU}&|4237J4*d|M87BOY~5$56FkWHq< zy@&_KmYVk3Gy7x8Xhs7{T&mOU%cH6?@?s~K3C#p2lv{;W)6~QdPB|4ZvUN8kmSeb` z=J}@R>ajI$YMq+v%kNAW153&BC?#XNy^C=&c2?)X!*cWS>C8RH377hLxjg*OM5*eZ zWE++;pG?7d>ciJMqI9h#hT*2!BD26!K*6f*h+-ci_ZBxzz_t}(VO8~UpM0SHz{6sU zmga(mAJw(`B3Uo6t`z}3xWrvOZ=B_JgwMeo9;+xD3`d*i+@nJCM1(*OH#>z3mH0HG zMw55Km7$54H7^-ek&8MRRE2hm%)?s$;nm7wt)N57)@q{wre1ECPi~k&ZrGjx=1&34 zSt#i{0Zc6@=>?SZ_2}u5c5WEbiNGQ}ZO%uHT%;ZrJnLK}YIo> z>mtYHrfdvL-DFtTPsHtQiPeBDGVnBGUL&KgU|h>ir)6*QY$!-n0d*kJ)#vAhDy#FQ z6UJvZmjHVOeB>AwkHzi48d-dh0&?K2txJHp0)F``gQM0WG%jkK#=@xg8$1U2g7WyR z&Cyv3izw@csH(+0Q#SCjIL>uDsu?3zP2cmMmwaZu4U4~Z=Db&Cj{LrUAB*2bf+%uu z(>DT~RI!g8AaZ;@Pv&H+arm{;anNnBt9y;W<*-#-EH>i+ljow;UB8-qP=v%uUPE=O zRhu$aWrs4hEtn!DX(pKxPqVGU45em%Qz<(}F8Zp0`;YUA5;7vZl~cGHott_e*#x*b zA*G2!zm`+_dh5M+OlZnQ<*#BWio4>W3vj0m-cF0$IM8@5FB7)&A&V621=A@U8E$#|C|M z!bi09(6MJay1VU!f64zD)Hxqd1eR0gJ3WcA^2#90XnDbYdP$%+HddK@wKYBMW4B&g`E03ZDSC!49Fx{Y+G0QTx&c5FFO!rqa?NHh}KEZta%7uiVgQhW$a$?d!4- z27c~!i37dGkgD%0J-xlfkf;34+iY^V;qsmFcr-NYZfNMqwZje);T^UtMa?ue!&ZoW z%(qnbGu!z9!2s58FR|@B|6qM#x78xjF4wdJp`9ll$Nu~c!&I-SamzZ!<3QYN$T?wW zZLacZ?tW~|GT#20PBr<2LQ|Lkeg;eK0MW_fpB6gRuq0T<0@15ZOPw}D5)%G=7rf$?h#sac%}{VJ#d+fEP3sNPhU?}Vd}W50TuA2 zH^VlZt?ut$9U6ji?LTp#Qz1meu)x_tDxaB{zwU7f?(Sv~5-xGQ zXlN>eXShClzxSmO`N9IwLw5XJ)-|_^bB$1)XJc;r+vAQ_5ZIW@_;oj3mpmv~v@ZzT z@^gD)xiWKn6DH`;17Cf?l?+CFyr%$_6&kpD)Z?hK?BF_OE>R6O8WNMN=~tOgsAfM1 zdB-deN=wW4K;TVe7wPG09XxVXUY7t|Lb}V4@(zZ1hfbk~rI4}*I8cAWB`Vn7?9W0F zcK299KGuzoS6Jl;;^gFcGdbPuzC_8An>Sw9jwM|Faf|SCISEg zSWS&#pRg(u*6{n2RT5(F@(*isW;SLv0M}ntD$EG>{?i%gZ)X5jb*KXvz^ZI)0R~V5 zfWMq88#{qvV*&r_%c=l2voaQicmVWZD%hahYyfT!4uHWgYcZH#n7K3Hu}J=15lcBi zTpj)~m)|~qY5!V`8qBkkvAwgyqc2nFUuqeEi<2w(FG&n$B@TA8G6kzji9Ra-T&gPA z8RF_>3U&rOhE4S!E8~XA|52#({5`GzsZf6m?C**m2;k)ear|55{soHWfur&6;9K+F zQe5T1?k=rN`%6BBL<|%X_+TkI^7pue!pJ-c$TG+%$j<`=*aG?9KgAA*%Z@fq4r1RH zev|wPk;+7z1kB?s{<){ENDL?Am2-x3n<0|eW^JWnJg>9G_~p`J$1-n=8-s?|tZ7~I z#mz&rCTA})IkufR0%BUVhsT!`_K8x!Mn?cvv%_$EckKI+)AvTR{GN8acdx-dA4$rFq-Oc6ha>&VK? zPYYM>1(>Gg*4HD{n3Vd@oT+0%d_P5o*=T;=%OKwFetx%7NtRNpt2gTV^iB>Lh)Ir` zvh&G>&>6XSGv26i3PO2dH^wf36Vt%Q>WV3LQgR@-G#5#^fgDC$03tfCB@g(S_guUc0s1oHkCNtKF~hWw)!op-1oMhK(F7?y#rmSS2KTHp%5ri$kFq7hQ7D=BqLO0T9Q{XRyUts$0U_{ zG-)xJIeGt`}2OmF4#-7$LOzDFqn z`e?}n>%<6@>&qR5OTXi6wP7HY?B_}#v=v|UVRZ2{%s&5pWh{>Ccz9;ZHGW$L(I@hg* z+^nu9l_}=C9d)?1+^1*P_D`QDrL;u7Tm1QEDT zQo3YS3-(?A+#6=nezZz5#J5JWWC*Eo0YAGTvP(4XQ*=oi zsIxvxr*QZX@0lh~=gu<{L2gf%-AF80cRBAoF>_Bv5HuumdG{7s|HfzXZMDE{v)S0QT2lD!VkS- z=GsrY=F8>}-#-LNlp`}fgBBxyHG3g<3wYxkpZPOP()&%BFYa=Om{yDaPwS2@Z(58! zi`;%KF69LWMR18>OU?%)^vGnuL{npbn-A> zfPiV|GZmLX&yGeV4&9^6e!9#kfM^t}=oLoas+bhSNXVC%wW_@QO;aRXw)zuACjW!y z3L-I@?>_s7rCoi2hx-7Re$X{XA{DxrbmuhLG9|TI$aW;1Eo@*v@dQ~Nzi=e$V&BGk zzQylQ>n821`yJoK&MeJCMb+NTd2Ou_Gu*_iqx?fm?H;~-H}`iX|LZ$Ct<$2XxsoOe z7C!Tz={Vxo`~|0XrC5ZrNS~0ibox~oMH@aXn4{Ii042woDu?bYkxE5I^!j>Cp(H%a zn@g`gRG#Hb$+~VODIG4)>UWOD4U+M~1?ImNU}Dtk{`~fMKF!GY+){A2h2IDL$3fq# zCQ+rT4;^|MEg$4+8v4=RcWPjH3`p>P@zzN#_UviLxuHg485%Yp{|%1I^ZC6;kGPk*AdQW9v>MXH^> z4_9M)Q6~U<2KycMg#fSCu-YIJ4I1JMyk3A$VuLUp4jS`m2{aZu4wY?FHfLtV8UUiqj86U|yn5h1vD(x(fb@pB|pKDk{U$er}cl+lYa-UABdTrKhQr#}~{WXYU z`D>moqqC0jI+gFVcwq+Nug5;UI!*GVwdZsA`Q7(!@t`kM>r(ef2~H#L+dck=X-2&> zKj5q@djH#(D;8GUFZXkk&8!{hmaS#v3tt3Y<5g|Gj^ZRJT)MK@222w;KFAAhBjJcg zP6y%H&;?wYRia$KAXA~mECo?>&>M8k&G}TI;mI8Yd;;x6N$`bB-*A@qhQ{44<`*@|rl7*l}r-hH(+pj~5+2Yxz@a$-q z18sd$1o_H`{uC&*CSL|bi4D|54j1pYUq)fr&cC}aVs|#uQx$<2=xKC1jOO}e)poWt zIghn`*^9)J&*f%fzgce(3@h3(Vy?*)^jgE5VsCV-UA!DE$8KC(tbc0DP)NQFWM;%_ zxQe$OaP8{!E_yMa(!gF1Y!;z^H#0|179^T6{I1+c6dP?_$*rs`gX>4^K>HdO0`i&5 zj^MJlRVEwjM1qC*N{`dc$XO;i$&SlXJjSAkYrJB6(vBJb7e)87vc84Yy`zQb^lUGr z%Mk+^Ol<&889fX2to)($%xVvZ z>usDVhbAhCIjJVe zAW4JrsgfhXyJ8J|b6(s2RArWpIY%*5H;ql&&DnZ`hnU$)NP%YUK1tZJ^|!0({TSvm zYo_lpck6A8HxiioHnW&DA96k|+bz4BrL8KNo?qg1ULGg8u;V&`0ug({h`|(G1at@Ji!VqTwJnyR+S#1gh_XF66&Bw zP>2`XA+0|(Bs_koZh+$yyc8^{32yn3(5d5#Zlzts zQTUliE#KPyQxQ}nJriV)u?85x^T|WLk}o?J*{ef(wzl3~106!b9UCLmbNBQ38K|-m z&7+#u7OXhk-RAdw=UqUeIk?2<$*C6k>vm=2rMi-KD;?^gSu;87HDcEhYxP0g1tK~N z^%NO|sgy+#U!dTeG&x``BSpbI4Z9l&Px7U5WOf6233Etjw#k!L^#cbBf!wXop&X8< zc(LPF)0eO0PSPODpj3pk=0m%$&0d$ap`Ez6Zvh?~HF_TXg)bOt);cazbMDS9x*yKs z4l}ha;o7*L;7!4y5sHdhC8};RohQ@CSZS1VG$%79MY0D*h>FT)hO27NOz0X=IH?jk zrV{yw44-nNBaXjX*&ZI%Tstp`+I((bB3Ake{jI~Z;J0v)b=7Z%Ue^VT%fwbzeoEb? zejOK>anUVE)v4ZoA9HyzBKZ`ib2U?}C~I~$v5dH>?f$oII#FMM-lqbxI4X!=5S z?m$rK;Omw4;@~#vQ9VMF?5mMEec#MC6Eq2b0GZ*+NK}36MI>V#NAl?yB~rreI7rSL zv2jWoUj9To@Tj*lBNkM)xXdJfPBAlFU5qYcC$r&oDZ{Zui>4ggI(j|Dc+TH@(&MXJb!yeaCNYMknHuEkM-eWI5EKUEl(?h zN7}Yb>9)+|T4DG5kBuMY3MdAiM;sHBrz6%o;t_^t;nSF-=XfZ&&GgG3RdX9<-5t?ETwZy8#oUI9t3jf?3Z`8quj&ryi$JH3r;6s7&h5xRk&ihvVQKzZmoBp$mWJ+- zngF+Ccr3_CQv1Q?C;eV^gwe{^sudyom9MLeXAMR<|M3`ulT6LdI ztHkw6s-p^Lue6k4a)}T}6)Q(qMIHOdgF&X!ws}NxSV+rtULKUJc%COIZPjs zy&bsJeyv}@eMZ?vnd=^n^t=f!m3~9GMmL=STO2MeywHIRuf{TlWNMRh4J3NZ5k<^J zq2n<^zxct1#dD`{Gl&U5V%Zx$8LkGl$YLkB^msQ7_K@RjN6VY@mjXX{Mw#gyXgY1kRb3pfk12@3T6!3b? z(z%yJ_O>u11tBJDO}A85ypUSbgj#WpI&UKlehNS@BHJV@-RsYp9n!#Nvt9kMbG97m zArni1)MfI=ep{dW*iWq=U%I!&M_dw>#(^2z>7oyIJfBI?polsd-=x zf^+A5IL+E(v3^-WYP#PNppa?2&rBDcYB{XgpDJ6fuArN%oux{oU5Z!sY(SjbIHxiN zm!OUsSAI%RxhHdM=~N{S`%Pb<##`R#m64I`Anr=CITpEw9}dMj)3)qM=Fcobv;8!- z65&en*Ib6^Txi=$%Prgwmb)A4p$}DvtjrHjX>QKc45{G_w$Zc@+L2l`e>^gp`Dak((G2|F2u3*~wPN5&4)yKR#<48Pl zD(a8HR+%4ZoUB;Jz9ZV6Zci4{uvuXlX&349nxTnRV)MfFjIT7aX7!WkG#BzBkV9EG z6Db<9)7*S@gPk3~uIp&pY$&IwrTGm*O^boi3+?%02$qQvQjS5Zn%Cr1Wh+j-29|-t zP)oRj0b5fEibS)MK$sr(cc|j9JH2L>hMV%Zch)D*JPMOuep{x3T`HnxZI86IWIN3$ zIs&pI0MP6QgEsa3F}n2m^23?iCGyvbuXbN6Fbjp;qI|(@v5{iQ!~%+AYN$spj+r1M zZ)y6T(SEQjE|W}7gMmw9c%J|`s5o<#HvpG6$`Wi4nPJ5szJnALmIl>vh-8VH-POd= zVjv2f4ps*aB(jw8_sUkfACJzvG!z-Oer>bNuAQDml=a z&OlD>0#qBra~|-nf2++@pG+TRFmTXZ{%ltDyqMIoKDB0Z*Mqr2Q=7K-U3GJ9V@+dS zQst(#RCA+jg_|Azy9km4B5l8dho>Z5`hWujRGPGU?{&i4E56QCe_y$WQNz0$>)UHN z!LKq>o)ao$1YK`#?~QLHX@=vp)DZ}~Z?v%$J_VanBpUa}a>4#&Aqhp>9GQAxy5`<+ zq!W!{#A0;e$R6ym`^JeN9oy1&qwmT>60)xvJ z#pGdi`lI>N>+GSYoDql1otF>hjqpk+SYbQ&U1T6~Y{I5|0IAFeVUPnMoH?2>$;hox z!s^3M^e9Vo?Q)V%r2VHn9rzmgUo6r1khhHT&%a;2;(%}vTn7UW?w&NSbF{{Ri24mF zJ>O#-4hPs6eYgWp3Weik%|a5;%)LVO^DD6>%zNgTP9uWKbD|5ZoXESflsdyc7R*j! zuV=UUXRdb5ZG)OnjxKh#Ic^YyA~04lRRrfFX428}{3V9e81k9TUvn&=pNTn}n^p&2 zV}Uk;%Hhuh&>=!NWP?^7T=X>HRxFO1IOtP_tx~bIey4Lw7JUOHrz3V- zcm~OHGnV(pG|~Iw?8o4A#Oy1o(aC2jQbA6yaC0@MO7HU7uEqnE&Cixyf=gT{d`VXm4y4$+L+B2||x$ z5Hp6%NOlV@s+QlkS|o5>_!UxpJIlg+opHStux|9aOHlVd{o#50M+stgrr=gwPk7PT z;BN254%z!wzuTY80nefwKUwm<6rmiJ<_LWqGqO#a5H}B}#wGgDGHHbQlh%RM|J)ZX z1xV_Gc0nTlf=KMOzeUnmJ)Xy)i?z=(f$4icTY(=^#>>wRGZi7b9Rt25{aViKfp)lK zAiRrL$O%EnK)d6d^WHA9VzNMgL#!8D6lQJs7g@r*Vg56+SPe=N!fbD!z8p~shz&vJ z6>;DCiA>MhN7UPT;~czLgmoewvplD;|B?<4oPReYoEVMqM2mbr@B!&;$IZIvVs3$t zw=TYg4~ZR;65qnn?#u?AmK^WIHXcuj=cXtw3eT>gEo0tD4tal383kpQG;~e4Ah>4b z2ayenl$(|fOen&rctXFJ@%3GRLbiR?s2OU;46fglBx+G9)ovEE0VfLLCWrHw-_$$N$ zBY4=i71EvaV|Lr;XyrBjTuXxeyCm}%Uq1Ov-|SBuJ7$NZA#<;2yxbPj1ySdwV>uP_ z-G!(@9d>5!{F3t=H}nYtUqrZTt1XC>5)9(>(hV>gYJ^Q>ykv9IlPY7 zP+uH8SDu7E3N{al6VQZMkVYkV@~q>9n|EHL^(0 zU{GirDQi&Z6eW@En4L4cYSg8zvNcPQtgEJts~)l-t)OB&89tF^Y}BV2<(^8>RIT`o z%LX1`Zm7P+X_Tkkl#5C~Q7iVQ>}+BhV`8R|<7{V=mt8HL(km_wW>Oh8$q}edT9XjV zFv(#ek|mvzNl^6V2@=p&!rL_yfusLp*gI#8UxIXUIQs=?LBbb{vhg4-UFUqmH)8C) zRF?2TbNmjqnJ#ET-iAia)C@HtgB~^G2UQYXLdFIq;!E7p{6zU|0Q*Z^&?+e!y9mvt zCA@Q7h$Z~z=ltUZgWO5pd`ZLIanY~#Og?@Bm=1=IY2y=CeKr*b@l6azvrs^bh~;yW zqMhOsxIm(`ZG8svwThuzbYl+q!x0hkv7#n@2sAW_+TK2)C{+BTqh|Q=)ddVmo76fa zXOrRygq4W}uXf}Mf`@HsK0n8QnIi%cj(CngJh|;vq*2xU=^@tz}=US8zU zeu8(oY)8Z|DLLLpk5e6~%2Ih?i1797x0+2WW*_Idi9LLVzwJXWoP2n8aW{THIg``@@1@DZx}e`BZ_B#zqm0k9-4y%9%Uu&+f>Bg}Lvr(*DZ%I$@o z^u_B|0(-}M{ft5*R4dT3`oIUz*>JWF+D_L;qAq!|NHq^6A(Tv~oJ1>6^(7}NGZ#Kr zx9Hqw$slP`Id?NN%ICPV+YlLg7Y~4AUM&mGLH&_F^0Iy_4B@#!TlUHEO1!Z_7Geu9 zf)3B8>N(IaMPbx=!W){Mb~Rf0=(qSZ+(~%b!+GRv;o1$4;icxIi|@03$ZW8MuIrC} zSn#tsPhbjSU*>C&oX62kwR#idIhGyTkF+LcBGW#0lW=Cb)(%tPn9_X&@SB7&g(IE_ z3%ntn(wJeKs_GBaHmi-Wi7wMcwV|KF-A%lpcgzcPip+i~jbx@!oiNPktXNs%#5_Xj zM9Nq9;S^Pd>9ZHbAxdX4;~yy$n%BJ$?kKG|$i_D{=NMo!yHN3>)Q^?z)%%%V;>nb5 zqkF8n+5b-9ueAJ`L3WP6@n7I0+WHrA`5P~N1a!s0&ZbUQ4lWR#;a&-2Lo(H z6t%UqbQs0K*2bEyYG5ZT^S=!wjqR*#p#ZwyZGc~m^uJqejV)l9Dd=xg(chphGdCA6 zfSH|x3jhMLaRPuqHr_wMT^Sc+TPsr$dkb4IfbCZ|5ogm!aF`dw^Gkf3;G@h8V(0v& z5Hog=23uKJ!ti6*Fuzo4E?_%N0MGCKkAn1H!s8NQs4N%+cm!_$3FJNk*Z&=k-^00@ zxcmaG)nQQaW2k;y;VcVlWD>ixtFP{E=x$if9YJDjA0MC(=Yn8nw2N`F;p832q3Zml7Y%H9ZG+%d%t}rri#3*V^Mg5J}5F>|A5Ebh})#H%%rA^^7?`XDn)Jv>Y9K(re=3Fdn5nk zEW-As$xRvBO^2?6U(VjX9NkVF#aKRH;he|TOE<_4=dIh$T%C&Z;8p{2p}XSB?#Duv zk0-iyszeGuI*zc?Lzn=pbNQYlLPf+5}1!W}-P|lUuk&!-3Wzr#GKc6xs zj}AfR-FcyRd~fReNn7tT(iZ?as-!qZ%f(hV)m;%9h49xvKI1UUDz1v}M{jAXLwh%c zjfJeJ^C+lXDvvgk_9>zph@_7#nAkWq!S z912st^wp14shQ-~;6eE=e-cy)4kvp1xnw9Or`$^RJ$2@Am5?SCHil~Mu8>h#!NE*!+zI3asV{+ACm$)**-C>x(JT4N ze^PQX$Q~i&Q<}YA?$Tz?K z-q)UL(n#< zPUDhjLj%Dl^c`Ju+QxLFFd1YY;?u^Ihz0KuYR&4IKm(1Ad_MMIDzZk;&=BSnFQ)9} zpZ2^%mA06r(V=7ZM^Gz5Zrc~7@uBIvw)|gh`3p1gb&sGt#!A(c(wkuaqzozx%@K>roZ9a7CTX~xr$xh7OIt1^O)OJWtEs0L$`8;2HWXDZLEc6n{ z|DGU_*nftzl*p=^x5tME7?CY!!IAZq3dZuvj`)5@VQNoMS%elvVBh6_o2qtqeQ^6$ z4dZG0UW%tM6D!*D{oIE#AIY`1oLGxh17 zw-u^Z8Ar(HQuM23Z<#L?jUbs8@a}q&1b*Dxa4_~r-FF=pi}^f&74;jmwZKuIy0@iW z<8e6oo8b9jhE6@+Iap|D=S{MlSnP1g%jr!b)6FLaY-A6USfvdb)A4m_QgFFP9#5u? zjVxYmVQ>qezzD0AsgHJiBP85}4b z;@#6Sh!OtL-RxHGY3*MiYuZ@NAaWBVkih=`w2aVsG2?bl$-xDate#{L}R;UIJ$ zu&mHzETjoL*q#81_&nd+d=(3O61C`s_Ejmke)CO0dhw8GM!~mkbdTGk?D`yUW)Q>c#X8vsa+P z?UR^y5i`eyIaC;YIZEMewqBE@WQ==;h{+HgDmEk6p(mXK2-*bKUzD1UGNvH6|2h@YkA+ z_;$D2-!1WXN5mq}#=*gJ=Wu7T|GgRfTK>fxZ6%a!5lcUSpZ`ukJXj$>gaKd`c?Dnt zu(Ew(@BvdTgYOd!H=N5DIT1O#vcIRP9XFhJ{%kLP!)IWn!LPWNPz-%nMFDd>nsiG!?WuGq5_ zJ4KRbm!TU#VquZBQ>140Rjbfs4AwivyxlQuIXPdCA6_v%D|A|xHe2tAVAN7${BE+6gLcw#)nxA2_enKd*^XV1qY21sscy6J!kg+2JOJ-424~z2Y z*|h8JIExQHkvu*V`z!g93>Vq%N*o1Mj?P12Rs)` z-szPU{oun6OTvpM4K%}mcKeII;Sl71qrj`tFgl*LQ#;B%;gHuT-@bIB5==`IMT5HF-jn!vA@aeFM z*uR<{xuy65kJsevhkQ$m$1Hd-IJddA&;1ySGe8+6kc;cKa6r-OBe$N3SdyY_S_=P&KZC)u$Ar<-iDuA|DY$~v za2Z95_Tqq{jZu0nrbV>FqJ`S^Ye&uD$U7>n$9udI&4d!oYap}4_JW>j6K&Y5YA2E6 zEmrB!Hrg}P!i&%!l9T!&@&fLWEb7B`77}z66v?rIN!lk{Nt$BIF@8F(8#@l0SGls$ zGIf`Nt&=2p04(RXFR4rOWmg{zn=gmv=Y7;V9ri(n3%~X{q@k|eUoxYYoU9QpHa~V| z=eA6sMlF1XB4xROTGPHGGq{tepv#K;TKJuaMS5tsTK-D9Om`idh5worG~wOPrIL=+ zte}u6eZ$_w?p=aYiVa8Hey(AjcJ>feQ-?Y0=}2V0Q@5!2%g{O<|C-N{=n45oHeTOl zeDn+Yh|Q^VPQLOgwV?PB(mp0ReS#&@Pxt0pr!04GKQ2VjsmU*JJ;-oV{ax7tQuRw# z%ctcJAHNO4BpQKz42TYU4wcfQ_+XKgq60qvrmr)7JeJ%N5T zZ2MILRdW&?ZsAlK6ie)F8~kHUX+Yri+9;rH8TYv>i5-n_?J-n4;m9^Oacj@An$uOq zdB*-53lEQK7GT12+1RfSMn(2muS1oFCQ;|%hib{J`Qq8n)w@1Vo83{%dC)ru!ti|z zi-|ByN;`HVcSvdEDE1E?_;x*Yncox(PRQNHp+@VY*omFvZ)|5N3qzKwWpYi4+BS5TE7Q^5qfaP%G6R79u)xk=ZE_)oVX52diYY9ydP8oSR#K4RTd; zn@o3USH4h0TfgVmvj%xiSRvNT(fi>!^@n6g+#1FQVF)wG>&Y-m+OHnu62^~0OI5b|lGI(hh{nK7Txxn)?1 z9z`E_s>l0=`j~0i9+GDvA?;PRiEl|*m-Zo-*X`mDZE{-ZIA}!I(+8r)LzO3wiejC7|kjs_i)lRn{jqRhzT7&KA`hM81#Xc*OJs z`&-mN;B9G7Cgsn?VG0(D(NlqggCVNjXJ>w!(MmZHxYjYDEDW52CbOsC;O<9@v4o&j zha2P-7k}8884?-xxB2u7GOrwwQ*`Wavx}DKf!!D7uhmZ-U(Ax@Te*kbOYRwB`q5Xx z`AP+~NS$|&D%w*-c?L6M2;4qT3l3Fh6!9?!iNd{DcnRi8JQ)vH-zNU=!1T@S^paP*3@P)-#8p~&6x@5bU{~ezbM4pJn+nO=94+_n zKa$UrM|XAJe|eEfoOLq%Y*B1?4g~9u`4*MiR=!T;wojZ9M=of;I{aq24Vg~1kUV-L z!~e5G(NC(SO&7ipkMfloLbnN%(K6w~Sa&6D7HsEH)Hp;X(I>KOE!#7dz9dOpuzcge9%R|MnigG zyHN9UCU&PlCtBS$lc6lpQI4M^tj%WGDX6V&SZ`f#d?~qYsI8=}n$wBT+FsP5)qZ8K z(`>*btfFJeX8K5OEjc(fIHgx0AucOyhWAkFL~2*+Hp-LETi&-nZ@t{+#>Ug;o6U1> zYjaH}buQ+z_v*qDLx*hovI@rIuOdXs^0KD}!-`~&u1?LXJ-SMEYq@WaFCGlcMy)v% z>Qq|&==YYW@Hkk8*5eCJSEbtz1%Jp0!pHee!Jj~XX*pTrbd>E@l3Qgk|CwrDE?$sP zpYN1XUJT^rD#jYnnu!~cQqE&5X{@Aob#w}?Bq)EuU{sd$kPVNq-;So494}4{Jg%c` z{NsyI!cNZt?sMRv(IeN(XF(qb2<9+Hka6OI-V$(zY%nY(NgNN77oj;{Xn!}`9&^5~ z_e|B5&!yV|xmjP>%`kGK2y}X?waZK%br&9BzZRUU*B`I>dQ~G&L`UnuC3td%41cpX zcN9vN)aKvft`bSt8Gr zl;Fo@lQ&W^S~*jmAnDbz79m)fkS!j)M(4IM^k%cJ6vrZKoa5~pYvtL*P)0}ZrOUET z?%7#fuDL77`|Zeu?fpWonqazM_%p4u!Hi?=nm}?+l3?WaR~g@i*L!@LXfj_>%U%K- zLMMqvCnpMZml?{KpKfR@eQ|MWmfBcxL33f2x55x%gNt)JmD6R@yh?kXC@5081PYKG zUjj))i<$&vh_09wi={=bs#4n&ex>!DFj&1Xj21tlhu6ue?xb(XxZeyTU&kel5NpeF z5FXCvQprV1z((++9m|^9D6sW$lappi32^cY7nP6Q2YxK_VCdnXb?fhm#>y-e4KErh z$L>f|J{%hAy1tG@eytJzP*?Ke`1{Wy*JDu*s?*^3)X zZK+mJn-&zqLu{aJSW0`f`ks^tV_P(bH^;kwyWsW6wew3HQ%4RXme65(qx|ccYCopu zK3^WSosYZDrLD6aI4AbiJB@pC7bSD!$CczH2JJ6C43TdHo|%t-<$$VWT#R*NJYXAA&?UKab>Gxy|)#^^G25|W?n;N z)bjn)iQ_RH`pK49!I~FDI_(m19ShDI@4c3LYJxMeC2tNUd$J{s-Okd#U!!5~zy+e^ zGG1;}^~7yX(Aa50F2jVh-S;H+ZP;cKte0EW{+^JPw*qU*0UcuBt?U-F%&W~Z zRPA_>MKkXwEf7MtqDlwn;u(s}NuKshx$?f3%8p6d7=C2s%hAFkl9HZ4Y?wN3@Vq#0 z)G&uuRev>J!q7jvVk`vC=_)$gF7G9$PgiMry{JSU?a`~9 z7U6?0NnFTV^?Wqtmz0n^%|*P#r)6csGuz^utfM!_-}!A1mJ7mH#UJ_uUjF>hu&uJCQq3lr-#9XM<=UiwNB+R1G;t`mcBBMe5kb7=W~NNB|I`iy@2+R zYR-!#vxH3C#cQ1$AMQ}y_^87qNWaRc`PS2xZp%bm33{k)sam@75YLkNs{IeStWcJx zag5sv%SkZ;pXSAPlovENh~jk}CV-*@DO@Tg1zAXeCd?M40)S4hor8eSPi!v<#Fzn8 z#_1U^d!qt@FZ)z6M==a2Q<`Nx8Gb(DYU64Xd=_q6fqj%U(o~~QMTAqQ@3Y0&scHHYo4h7qOqZTdr|@)MR*|D^>=jv> z9>crtR6NDd+BV)i?do^37voX1H34Rxgzos$2Ga&`!sBGwP^={qlX@F(GjA7fql~(t z+M!Ch$=p4w9YU9%vP4BZzIYOWB^KJWohtNoQmgXh>1P%|=P=^95Ft{K_MGIri{AUo z^YewEneC7eZt}q?X&pffkQu!nrP)XD&(%EYIj1YxgwWb(_sirMk*JZu(>9?b9c#LW_k$C6mNk zN_$`SHiq(I4D_`*a79?P%2w#LbEeISC2go~yxJq!OEG>`&cXZqQk3azOXX-kHZ!U} zgCQACoTzHquypxT$3sTx4BO3zJV&Qxm`7Rr+o8y%NOI+Mjt>5k@RsXsTjK59vgjn( zKkh)}`kn?^#6*Pk2lQ5NDo{6$LLv%H(~X45g~ydbM1dH#T@;1B6WP1Ua1M15`$#$S z+eGtWec6}dNr?jH?u!fg|a_neom{`5Ox*4I8$upCUDBvHT(T16bz4%Sm3XW zuv7>sG~Eu88^*$SBJVf)d@>f+WpGH!1Fe{J8V=oFi{LJiX`5XSFI!>0yvOru9>8os zp>BpD7!S{bu?&tot zUib`;N}hI-UXdo@i=+{j-ZSDBA$Dtx6;sT7K)8TyV`WZ}AU)rzq!5r+!Zv418jw5F zwmML>J=~~-A^jwDXK| zGn!iXyUupbV}nH_N-AKkM${xj96u{pXBD=R0&EAPN_g)Nhge72>t|h$nd+Uc3#%Jz zPj6ZVie$n)XzPNsY$6Sx1mRx2rV7%vKlDq(DvI^s;<&lMUy0B^wFmjNb%BW_Xb+8~ z1a9wPSP^}&wwSm@+kv-V5>`;r@5f0AJPc1!V=W^EJ=X4k+Z2@4S9OrWj>ZDks&<+2 zTK3l;r6_%9EodR5gIuH7gvkl3sb)`ksfAHyQK2W|3AaA*k*w6ccHv-^HEwOkUn2KC zp?sc-HuP~JS+#9S+W#>0;#TH+p;xkZFttQL0B!=Q$TLyLfE)5FSMEw#k25sXx=&3# z^(~VXXV`vmng>e{txomG>Tvy1HNfLYMl@imZ#Dy;cdDuR^MxJoZu;LFJvvXl zKhL>PeqY|M&p=+wwGP5>0NaoDg|4=|(R3Ka zlCZl$cXt&1Ks9}C&t1O#v*p6_{F|cv#Cs7m0(*K2Y;wyIZ%opi?3%p=9DeFjWESwC zY=B^rhoq06^Ld2|bHVPTokn@RtNJfG>t_(Nc@*PW7vHZWUmWC_r(IA_VasPb{YEaP zkg)!J@f!Q}mZB8x3dU#lb~?L5#fN8 zcommdO{zu?F8MMi377{VB?%19U?tuQlSp4G^+IO&B7jmR_n-vhs{-3x&kI-J+{0x};P<9cMd&JOFoV(r5kYq( zC8H96XX;xdB{lmY@$UQKNC0D@SL7QKMhsyo9HAHQ3^F)~zJ{>ckCrUiO9m^eFBuXA z#1Z8+OQx_24+m|1v=n7vtiuooK&W0n4=t{`{{SB_PhfEI705r(;XB~R|C4C|&51XS z@g)<9L5YU{nxi@g`VhGw8cxp-;eIZ`cRuDR1Kr*ba{otumbuX%)Gc#; z$%%;LA0dDxl9C6cc7Ta`8HI0q51vVb-aQ2V45jhx=sVYIlI3~4;R-+UWiHudjm6XOIqf z_Zdy422f)^a)_?7Hcgf8IGR~>G$nZ$ef=0cv;`+npN@Z`5o-b|bnQ)!ZADuWS@Ly_ zx{mnCW~F5)CmlHSBqM^kM2xT%$%&f01luXWO|0QHQ*L%sqF3EZ)-LJeQraT6xJ&ad z@~^ZFa8~ZCKV~!tGy#gg{!BHL)|qygH48&4##whw3uCTU;Ic8VsQt9 zq!mO}#2H1QrjWOGN>FPP!@nKGAr>a)P5}DfZGc~m48L2=Aw~!u1Nz%l_%|?O=H}u7 zFtc-T0YGdBZp6k0cQ0SkK=njhgHyyt(XQyZT z3y3KrFw|YDeyPF`8|W{*_Wzq26Kfk=5o3t;p9IK2{<7IXzq|JDgd_U@_fJI(hPb)I z1ZD(aRW*UVg4vk-5q^bMH8HR?wn2b5cDB1={GXm&>|B69@BfqE|7#m34TT9u6?Vg9BlMet7}`9Nb_4CmTC}1IYHP1qc9haw4w8 z{(DX!pdefYW^#A83*c^n`DfMsJBjb0rOmG_Nt+nl<>-!UD*lZn?d}-nKiDAKKe4BR zHQc~XA905by*|Xm8V+D%VdG)}0_crxZ7pqhSrL2{p&D63ER9X{ZCK#eMhw5Fxd`I^ z8(R}NO!SV;(u?wffFKSaH;@YiWM}8(VFZHdfIxt!c2K2M9up=k5W3{n6OCxDnz0hsMc;0JHzr zfIv0`hW@w4#>s)m@V_-MC-^_-vVrf){ck^RFxP+j!VL!hCk7Yhf=m z2M9(?_Wz-QfdA-=tu+Mk)3*L~@05~>GxRQ3cZ--J9F8dZZz_CeOTi2gwY@9+uc>5X W3$eESm0cbXCpRVyjhMVR=Klg~r>>$zE%I!=xc2#|7dRz+t$%+A8e5+Hz*H1Plc0FNBp#tDW*ZEc`VFd3LB!VHEdE{^BugoHtD@!Zk|^i>hF zg2c_6x_dgC6?hgwnPOP1co^H+ZxND2Z&w)An&B&BtUq_p%?s!16XKPo>sd)Y=E^t( zb-E*HVMy95mw>Ri@7%|89|mWd*B2UD%9}jT-!h!A?1*&^o$qujuVe2TyMCRE$Y;Ftc%+?mBXSch-WJK!aWG>?b0w}negyI*_S#GXi}RN62jej0(#OIhHr4Udt>Zx z$P+CC{k0vF%rDNfE&RHTG-tBUPKt)6gDrN)v<^3ZDwWf5TNrYXPlzECUN>*%5t(Cs z?kMpeTI$H_>8k_J%@?wK0H;j&EIRaN)AwIZfWa$N5q} zMG=QxtctFJzE(8h)OlqcG9*qwNkud@E7&RQeDm##$UvFNT(BGcsYYeGFYb6Y>y8^v zFU~U93~1D&aMyL|4T1P=MG%3K8BJn#e$lt82`O4pe5puok)9UY zp+`Xh_ZOL^q9BVsjXRfKx17mW*~cs zvMUv{4=|-khkq%5Dv^lR-V_I9y+h?$h!YD6BA|?#4d3#`rtaAIR`AiUx5Wz-%0L%D z(R(*MlX;2csI-Gg22ir3p%}_6$vo)(oU{9*zSWk`j8;?i`dp0agfEWF@RB4%-hv{@t=E-Y`$HCsGhr(I#5=lGex+M(U=uJ zTvn?y#Z@(D$4IqpU>z~n%kot_{8dxc7xu>efF@xeIT(K{iAgV#UKY*03ZSh&qkHxZm&5-49;u_{jd$fLx7V(^ANy ziltc>CbkSZZKcfk|v9D`QlDPMTs%hzdb(2t6~y6P9|JRh2H{*HO`L`;swaC2}n64L-x4yz8=L z!Fo}*k$i8TGsS}q_JlunU%Aq-wFH0W_+HAT6yLja3Fb}tfzTNa=@AgmYa3S$;ui%u z{9jzoun)PlhPP(r zD!8I|I>vyDN(PT}0WuY$Q-hZ{Z4kC^tU)cb(!BUX_tB4SoSZfCdZn*mA}kwMTyT~p z9dz40pd`|2T74v#*{aR4EVYB_c8yT=C$Wl~B(u;+9Wxga#?s5Md zb(o)0+AT{t8sM3cMzywywf1d4LncJGahMNhU;LX)R;yxGXQI7DxnMXKPAPF}a9w9W zi@LDUaJeSdk8}UTWyA5A4Ls5Oinmfs7siK^nMhJZiH0I=om8@^PIgsEi3VG`Ms|$1 zVWRCp{wN@xx9C9izPaQ;1?zBiyVf;JV7lSJqTKn_%@)~x)JRU zgq-72FR|;CMiGqWFria7G4AnnF14|CV;yjot`$tMk3n!Xc0suEwbT_4W@q+ST|n(! zSCs2Aa=k?r5|98t+3uT1*{N;@IZ)X6WC+_wz0FMUL0tR3LfNq>? zK#?%ixj=u8Z5F_%U;M%g<8u8ZdH3Mr37INSeY zFu$GLsDEaog>sFA+Bw=^J2Q3vBP#-&kj}6_EoqdM49o>?3e%LAx)%SMt0v45;fyqe zIRdWZrumPl38C8mt8jz=9@hV@a9>CEcZCN5z#UEWddQ6d#kdKLZ}^N z&9UymK2s&uFNU3&&zr6}0^c@ijpG7S7n)9nJQq&BYmUwBHdvgcI|>JoND-K!-(Jpa zHQO0cRu_v_3wC6upWQku);YC4%xsnlK8HwI8l8;d>cr=W!DjnY{=lTz>YTH+E--0RDI(f z`&|$BLu^y+Aq{$neaniMd>1!X*C*e5tm}#sNQLh|rOGA@TFxWstgCRCR>d+8*}U%V zN2B%0A2(XJH%NQz_8x5pXBrkjCDl=2=^bTJSE2e2FJ$=Vgx8O4y#WE=*A+c7x6haua ztT-@8)y)$-mZ2lcLmtNedfj<;mXJx-ip1~%hvwlxv-Nl;-XlFsInrBy!pnL3Z1 zoiOVm_F9*aja4{^?TeV>zS!=s{BtMHp6ChdNV@nJpeC$-tljqbs-ci*a35F9{U5dU zIg-F`E)4#)3O2L4pzdJ&kk>9X99{#P!7lV+4D!}<`g6_qyJ`o-r!T5pbg$568Z%?)4v?ux@qV-BR zhEShJn}h#;#beKSR`M&KMr9(jOaIXR=XANPj0prcAJGz z(^y=d5|QT>ezm=M$Hg$B7+GL;|NAakE?=@r0va=RWlA4j*{3>RF5Yqq8uQrHdxcMP zs_$V~@#W*_PLq+Uva*fLo~^4jXg#XJXNYk~gQhv8!P89BV(DG|i%bsGu*WUmK0JMN zn?{jzHa{gVo|1!GE9_Ux2S&tNk&a757GezH=4EY*-WpDg_sZvZn(3_M{s)7v{>qk5 zFHWAG&`u9qBkYNeI;3f%N1Q!Ga=NG~aUI8#jvOl6Ek8Rn$l%O4(Q8eT>@$}Tu<3wj zr>Nr9Gv#E#r3d=d?8Gne(r%&g0NdH>G3plk4i#nIkg&Y$QZDIbYhbMV>aaYV#yh2c zk(U4UeUGPhzGo$4lG-^?wkdz7$hcwAFkChH9c&_Shc6UPBu+MzK4j77(_-7-Xa26^ z-Y2~uNbNk?N?FVm6E}GVhu8;wYF4T#22Ug2j-iWNM9*6c9A||-+rSjc<~#6Pc)BVZ zF}O8qK4v52Hw(akEISdGy|LS!fB(Mu{FwFITC-QO$7*@ci_v-SGo%#f98hQUFSltPu-?vH53&|F52FG0fl#(vb8@IbZu zyc28c(VAZh+VqhL8LNZBL(HH++n2@_1L}(vN^f*llM*tgj>fgrOWZk2xqodJC$91o zZdOZZ?lhTCBpfwjzWyd&|IT}E$vn3ARYJhf_1pIod&TjRIb-01NjLv`W%FM0O9;-X^cG0)!e-_~_^x-}h*R${jfs8Vso2#iXHOQeTWg!$lU{91 z?U;JTV+gXHPMWE9Jgn8-zHkS(kHWrHk4|EmFyHO77_-g}9T-JqY0JcB^XaLI_I(a9 zZ1WpI$nO`^mXMy2l21Que%(h!Mvks`+|Mz4s#`B@Z5sYW$D7AEzV2vaQZO$0sFIOgV_CD_eV)uKbJp6 zw!YC4HzF_hc#bVLb+I{l5VE2sc=>gb%UUpD;2glQsp(hVxpy!MH2zGgSOs5x4-*Y> zNSC8G zQ*W4Ik+44S#b2TL(dwpR*CcAb(X^1cDIOfOSFqrHez*fB+@5Q2Ecc2&@9>`Bcee>Q zp7!uO!3T9e@^=3|%y40~xvBk32*dU(37ynJm8Ozw7zp_R|% zrN4;fs9`!F)5VC@fdB5){cQMG0ehtAsLXod5)<8bYEluZVvlq5vWlwn=n0pMUziu7 z*@wYH^tA-d`hjuFg2e5Hn~TnjWlPi#@@Pu|BNem=se{kugu1gwp17a7JyHS1!>wk;GVFa}BaKA!da+P5@IZ6}L&kp8mN$w%6}=#`9&3{!^$?meQe z8S5oWCHp1kB?SUSJlR*1%a>&i1K_hyewnj}pW2y|tvO2#JbiLXHZPDx(c+I?5Ts<~ zhA)nFi=gJ>&DyH>Obk!k)?`$14NR0Lj+f8YgRO~30CkENhE0nY%cXn;aWxA?=VBUBgGW43hE@-(yT!+K_^qq> z*=f$u{7$FoB}eXz@`Q5t4!TGBrtfIpg#H~IopfUhNY=#W7SZ6^12!C;O@*-WRiv5= z>Olpx9{zPD1xEYomf`H=bB4H=^YFS90_x$snh=a_t@|d>;4B!sd5D6)r5|x@)C+m+ zH&*ge2e<*lNAxOaHWeXPah!{V0*WC#-o@h^r5=fc+aqTs;<^$>2?8Gl#tgo(JE>O; zj?KZ13ZGgUeJipr|6=Pf{xot!w8_}|Gj(!EvH*q2u%5F^j`U{L#x_>tuw7NEEk*nC zJ7$QeT3vB#OKWy(WhT?d6b`ensoDqflgX?7%G5U0HncWuWO!SFgQ>AS8(}K4YY#u_ zopZjZ%${^BaLxyh3XU3Q^9aSKQW{X`QQ|pvH`KZxW4=b}!ZmAJrZsHC@yAVSY~#gt zX3ZDY+$PT|iUr@!8}KycO4xzzY#avKa9F~QncqDjr2WAk{Sw~oWoP25OX?vs{XWy% zL&Yw~`Mj}wEB4vUc&=L`H^YmjD$PqABfIdHdi~3qWg(BCzQ7-aDMR@&vYH-6i-Irb zx0Q-&HbME86%rY1ea=w3O==rLsP}zEEwfxkymY0($?;)kr^~f>do9bYMn-X-6v4;-z}T=rxD_HUhzr_af+UQPDZ{g@tiW8-U+c~?9poL(>}wc_gnV;dizjPdG0 z6QOho{l@ihV}fL{m9IEw)4xzby6nR4bK>^Hf(^o{it3f}DkFCuqp?KnVu^>G4H-Lv z-QW7>`A_4hC!q6}kmY>*!{ZH2H4ST5^!*7ch1vF~=g-nLOj!f59}rS`P1;Tbq&@JH z3>`{K&-Rn#e~Bd#RdZr88Da1HT?%o_A5F%82a(ZAg#AuxegYfY{=id5U$M3RHLhwr zQkCc7J7$yD_+pu(3!T&gS4y!ZK;7Lm>+wVn8)}n>v`eBtnYty|`bnse{$1Vozf&^b zp%Fg&($Vph6KFCyd0&g(BpcKZa^dG%y1;!|2Ti?9RuXG+Lt_g9l9k$wNDnGJRtxW5jE@cdw8ZaLTKH}*$sJYTME+E@# z=#6VwSHK_Wa8J{;q(MXbRtcrj|{|9V@5tZt9~k6KM^p z0CUlB83Uia6W7;Kc>%WExDtP=`cAT~8MXY+clOAhd^4r)L6=vYy@0v83}#X2hvzKCKbF8u?|XW` zsSUe7f3{xITh3@LoV!|=2ha)4rOx@wO?BSL2P+dXNQ#G~4F zusriYYX^j7U-jX~sV7z8KK^klv(SF(h#=0+UE+IELGaD-qt#!sv!=E~Kp0Qn01Xr6 zX@r<=gV%w}ZiDP;hCJH+Jrk&x2no*^^izasiA0Np>?wQCebHCCQdHj`kN*h61ra># zy4QXpg2DcP&qpR0a{?F4yqq+t7ECxXO~d-ZYIRrlduc30tdIUY*hCU-s(kSwBKIo9 zwk2=|@Zun!pkRAX?kSdM4#9lXX)S@<96@T`S>r?_V7#xwqAq>AFJP|mlT96sOPz+| z$YG-k^&~gnmQ}XQ`@jXbZ+fYh?YOXxCz{JF!c0VFl74@tvi5?xaLUDRytl&E8h6G8 zt=4w2PCTuzHla-?#OL0I2bi?hhGHt=$>hk$DErY~s_bO2Jc85z)x*6keA>cS=Mfdz zCE>PF&j73Unq?%~I1Z&v(ju-#3Invgv;Y}_*=Gp|(%$}`tv-`Cz;%wS@e5{s`eW8^I3uk1$SoNv=j$NrrTt-zpA@^WY9Y*WN9lhUX9dl_*gHGEHmqbLBBgDKcUlp{|yWqDgp(L=BwCx(_W zFmm)PbrZ^%`B6?S=x!&CO&8-m@lS0*BG2o)?(ns#1`4`p;gS)N>t_=Ng#x<>{9!8Y zG$oIMB+py~dvBjTM*mnR?Ua=vOGvERYmr6!(4Cfo&9CXUTEhCvCLQlbMK1~MJA4Vf z%38u#zes~Ww0Fb;uz8|ad>@1i$&FNh1Qh!Oz#{xE74)uf0`^E_vKLgN;Kvo8NP2Om z^?ICemaDP|{YWy){CyWX(U=z1Ns|}QFLlHT? z?Jbwh|~zrDD`FFGN``Y%zqe!l$|Q4kOUgZ@0^hy3?RIOzK6 z{r?}sHi{Xt^9K_2?7D|OXq3iO#n)#F;CH4ZI?t&Td!&#?1P@@%!=Cr9G;$sxT6~r^ z^sG(I;d@3Skh{uFy?PrO?M=Yl4r^h?NBSP#>rzt`nqIAaQMh-MNO>Wr**glP*Tr~ZCXcoZ)&HzOPB8@XQA6n zLz4vereNX)oA?Bd=CH?yOfw7YL`zOa(m_BvVObd|495*j@wmkeJ@`gQ_4g>mMbM zXIivL^p4$^P``};y;lwRgYT} zx3oGaO`WuBqCA02TPwK-IJsN8fNMZ)7v9|v6LPUuVZ1Wo|7PzdCx4}nk6++V_CVM4 zZK33odtkL7j7qM=Lr8J+_=_-n>krJqF@;x z=z1Fev+@dEBeMU!3i1g8uD}1*`TnE)yZ?7fQ1G8;6cV^5d;jlM0D|(w4+IDZqR8O& z7IHnM>wOf@13^$<)IKWV*ZUBD@Qp4AP0Dd6orth~W0n`|7NFa*n zA++t_*YCpsuK#`hnZN&D&ezD%@g`o%aI@=ZU5{P!Z>H#cO(6dP_CWuHmKsQenX@VC z9UN9uC>)6ZfVe?|+&}=UrIVArqc9JOj-pfxB-Gv#ZtBR5Kw7Z0;)4;18ufItEuelQ0R%mM_m{nP3HVA(&UR2qu91rQcD1HYNVU!3%R=&BnM zW{!uVr2KfmzZZa@fB>HWz#Q~32tq;Re`_E? zepH12t$_uBsM7Fn4FrLp;`nb(2rP)YO#inAK_&O!8ki52`u}Lxm+8OlAgKH2KN>IJ zKgQyOgraV8 Date: Mon, 22 Jul 2024 16:10:20 +0800 Subject: [PATCH 30/30] Refine wording --- concurrency-primer.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/concurrency-primer.tex b/concurrency-primer.tex index 292a72c..37aee7f 100644 --- a/concurrency-primer.tex +++ b/concurrency-primer.tex @@ -690,7 +690,7 @@ \section{Concurrency tools and synchronization mechanisms} Consequently, we should not focus on comparing which communication tools or synchronization mechanisms are better, but rather on exploring how to effectively use these tools in a given scenario to facilitate smooth communication between threads and achieve the programmer's goals. -\section{Lock free} +\section{Lock-free} In \secref{concurrency-tool}, we explored different mechanisms based on the characteristics of concurrency tools, as described in \secref{atomicity} and \secref{rmw}. In this section, we need to explore which strategies can help programmers to design a concurrency program