|
| 1 | +\documentclass{beamer} |
| 2 | + |
| 3 | +% Theme choice |
| 4 | +\usetheme{Madrid} |
| 5 | + |
| 6 | +% Optional packages |
| 7 | +\usepackage{graphicx} % For including images |
| 8 | +\usepackage{amsmath} % For math symbols and formulas |
| 9 | +\usepackage{hyperref} % For hyperlinks |
| 10 | +\usepackage{tikz} % For charts |
| 11 | +\usepackage{listings} |
| 12 | +\usepackage{xcolor} |
| 13 | +\usepackage[T1]{fontenc} |
| 14 | + |
| 15 | +\lstdefinestyle{CStyle}{ |
| 16 | + language=C, % Set the language to C |
| 17 | + basicstyle=\ttfamily\footnotesize\linespread{0.9}\tiny, % Set font style and size |
| 18 | + keywordstyle=\color{blue}, % Color of keywords |
| 19 | + commentstyle=\color{gray}, % Color of comments |
| 20 | + stringstyle=\color{red}, % Color of strings |
| 21 | + showstringspaces=false, % Do not mark spaces in strings |
| 22 | + breaklines=true, % Enable line breaks at appropriate places |
| 23 | + breakatwhitespace=false, % Break lines at any character, not just whitespace |
| 24 | + numbers=left, % Show line numbers on the left |
| 25 | + numberstyle=\tiny\color{gray}, % Style for line numbers |
| 26 | + tabsize=4, % Set tab width |
| 27 | + keepspaces=true, % Keep indentation spaces |
| 28 | + frame=single, % Add a border around the code |
| 29 | + aboveskip=0pt, % Reduce space above the code block |
| 30 | + belowskip=0pt, % Reduce space below the code block |
| 31 | + xleftmargin=7.5pt, % Add left padding (approx. 2.8mm or 10px) |
| 32 | + xrightmargin=15pt, % Add left padding (approx. 2.8mm or 10px) |
| 33 | +} |
| 34 | + |
| 35 | +% Title, author, date, and institute (optional) |
| 36 | +\title[Parallel Programming Course. OpenMP.]{Parallel Programming Course. \\OpenMP.} |
| 37 | +\author{Obolenskiy Arseniy, Nesterov Alexander} |
| 38 | +\institute{Nizhny Novgorod State University} |
| 39 | + |
| 40 | +\date{\today} % or \date{Month Day, Year} |
| 41 | + |
| 42 | +% Redefine the footline to display both the short title and the university name |
| 43 | +\setbeamertemplate{footline}{ |
| 44 | + \leavevmode% |
| 45 | + \hbox{% |
| 46 | + \begin{beamercolorbox}[wd=.45\paperwidth,ht=2.5ex,dp=1ex,leftskip=1em,center]{author in head/foot}% |
| 47 | + \usebeamerfont{author in head/foot}\insertshortinstitute % Displays the university name |
| 48 | + \end{beamercolorbox}% |
| 49 | + \begin{beamercolorbox}[wd=.45\paperwidth,ht=2.5ex,dp=1ex,leftskip=1em,center]{author in head/foot}% |
| 50 | + \usebeamerfont{author in head/foot}\insertshorttitle % Displays the short title |
| 51 | + \end{beamercolorbox}% |
| 52 | + \begin{beamercolorbox}[wd=.1\paperwidth,ht=2.5ex,dp=1ex,rightskip=1em,center]{author in head/foot}% |
| 53 | + \usebeamerfont{author in head/foot}\insertframenumber{} / \inserttotalframenumber |
| 54 | + \end{beamercolorbox}}% |
| 55 | + \vskip0pt% |
| 56 | +} |
| 57 | + |
| 58 | +\begin{document} |
| 59 | + |
| 60 | +% Title slide |
| 61 | +\begin{frame} |
| 62 | + \titlepage |
| 63 | +\end{frame} |
| 64 | + |
| 65 | +\begin{frame}{Today} |
| 66 | + \tableofcontents |
| 67 | +\end{frame} |
| 68 | + |
| 69 | +\section{Introduction to OpenMP} |
| 70 | +\begin{frame}{What is OpenMP?} |
| 71 | + \begin{itemize} |
| 72 | + \item Brief overview |
| 73 | + \item Importance in parallel computing |
| 74 | + \item Use cases |
| 75 | + \end{itemize} |
| 76 | +\end{frame} |
| 77 | + |
| 78 | +\begin{frame}{What is OpenMP?} |
| 79 | + \begin{itemize} |
| 80 | + \item Open standard for parallel programming |
| 81 | + \item Supports multi-platform shared-memory multiprocessing |
| 82 | + \item Used in computational science, engineering, and simulations |
| 83 | + \end{itemize} |
| 84 | +\end{frame} |
| 85 | + |
| 86 | +\section{Hello World} |
| 87 | +\begin{frame}[fragile]{Your First OpenMP Program} |
| 88 | + \lstset{style=CStyle} |
| 89 | + \begin{lstlisting} |
| 90 | +#include <omp.h> |
| 91 | +#include <stdio.h> |
| 92 | + |
| 93 | +int main() { |
| 94 | + #pragma omp parallel |
| 95 | + { |
| 96 | + printf("Hello from thread %d\n", omp_get_thread_num()); |
| 97 | + } |
| 98 | + return 0; |
| 99 | +} |
| 100 | + \end{lstlisting} |
| 101 | +\end{frame} |
| 102 | + |
| 103 | +\section{Basic OpenMP Features} |
| 104 | +\begin{frame}{OpenMP Features} |
| 105 | + \begin{itemize} |
| 106 | + \item Compiler directives |
| 107 | + \item Runtime library functions |
| 108 | + \item Environment variables |
| 109 | + \end{itemize} |
| 110 | +\end{frame} |
| 111 | + |
| 112 | +\begin{frame}{Basic OpenMP Features} |
| 113 | + OpenMP provides three primary mechanisms to express parallelism clearly and effectively: |
| 114 | + |
| 115 | + \begin{itemize} |
| 116 | + \item Compiler directives (\texttt{\#pragma omp}) |
| 117 | + \item Runtime library functions |
| 118 | + \item Environment variables |
| 119 | + \end{itemize} |
| 120 | +\end{frame} |
| 121 | + |
| 122 | +\section{Compiler Directives and Clauses} |
| 123 | +\begin{frame}[fragile]{Compiler Directives} |
| 124 | + Compiler directives guide the compiler to parallelize sections of code. |
| 125 | + |
| 126 | + General syntax: |
| 127 | + \begin{verbatim} |
| 128 | + #pragma omp directive [clauses] |
| 129 | + \end{verbatim} |
| 130 | + |
| 131 | + Commonly used directives: |
| 132 | + \begin{itemize} |
| 133 | + \item \texttt{parallel} — Creates parallel threads. |
| 134 | + \item \texttt{for} — Parallelizes loop iterations. |
| 135 | + \item \texttt{section} — Defines parallel sections. |
| 136 | + \end{itemize} |
| 137 | + |
| 138 | + Example: |
| 139 | + \lstset{style=CStyle} |
| 140 | + \begin{lstlisting} |
| 141 | +#pragma omp parallel for |
| 142 | +for(int i = 0; i < N; i++) { |
| 143 | + a[i] = b[i] + c[i]; |
| 144 | +} |
| 145 | + \end{lstlisting} |
| 146 | +\end{frame} |
| 147 | + |
| 148 | +\begin{frame}[fragile]{The \texttt{parallel} Directive} |
| 149 | + The \texttt{parallel} directive starts a parallel region executed by multiple threads. |
| 150 | + |
| 151 | + Syntax: |
| 152 | + \begin{verbatim} |
| 153 | + #pragma omp parallel [clauses] |
| 154 | + { |
| 155 | + // Code executed in parallel |
| 156 | + } |
| 157 | + \end{verbatim} |
| 158 | + |
| 159 | + Example: |
| 160 | + \lstset{style=CStyle} |
| 161 | + \begin{lstlisting} |
| 162 | +#pragma omp parallel |
| 163 | +{ |
| 164 | + printf("Thread %d is running\n", omp_get_thread_num()); |
| 165 | +} |
| 166 | + \end{lstlisting} |
| 167 | +\end{frame} |
| 168 | + |
| 169 | +\begin{frame}[fragile]{The \texttt{for} Directive} |
| 170 | + The \texttt{for} directive parallelizes loops among threads. |
| 171 | + |
| 172 | + To take an effect it must be within an existing parallel region: |
| 173 | + |
| 174 | + Syntax: |
| 175 | + \begin{verbatim} |
| 176 | + #pragma omp for [clauses] |
| 177 | + for (init; condition; increment) { |
| 178 | + // Loop body |
| 179 | + } |
| 180 | + \end{verbatim} |
| 181 | + |
| 182 | + Example: |
| 183 | + \lstset{style=CStyle} |
| 184 | + \begin{lstlisting} |
| 185 | +#pragma omp parallel |
| 186 | +{ |
| 187 | + #pragma omp for |
| 188 | + for (int i = 0; i < N; i++) { |
| 189 | + array[i] = compute(i); |
| 190 | + } |
| 191 | +} |
| 192 | + \end{lstlisting} |
| 193 | +\end{frame} |
| 194 | + |
| 195 | +\begin{frame}[fragile]{The \texttt{parallel for} Directive} |
| 196 | + Combines the \texttt{parallel} and \texttt{for} directives, simplifying syntax. |
| 197 | + |
| 198 | + Syntax: |
| 199 | + \begin{verbatim} |
| 200 | + #pragma omp parallel for [clauses] |
| 201 | + for (init; condition; increment) { |
| 202 | + // Loop body |
| 203 | + } |
| 204 | + \end{verbatim} |
| 205 | + |
| 206 | + Example: |
| 207 | + \lstset{style=CStyle} |
| 208 | + \begin{lstlisting} |
| 209 | +#pragma omp parallel for |
| 210 | +for (int i = 0; i < N; i++) { |
| 211 | + data[i] = process(i); |
| 212 | +} |
| 213 | + \end{lstlisting} |
| 214 | + |
| 215 | + This is equivalent to a \texttt{parallel} region with a single \texttt{for} loop. |
| 216 | +\end{frame} |
| 217 | + |
| 218 | +\begin{frame}[fragile]{Clauses: \texttt{private} and \texttt{shared}} |
| 219 | + Applicable to directives: |
| 220 | + \begin{itemize} |
| 221 | + \item \texttt{parallel}, \texttt{for}, \texttt{parallel for} |
| 222 | + \end{itemize} |
| 223 | + |
| 224 | + Controls the scope of variables: |
| 225 | + |
| 226 | + \begin{itemize} |
| 227 | + \item \texttt{shared(var)}: Variable shared among threads (default). |
| 228 | + \item \texttt{private(var)}: Each thread gets its own private copy. |
| 229 | + \end{itemize} |
| 230 | + |
| 231 | + Example (\texttt{parallel for}): |
| 232 | + \lstset{style=CStyle} |
| 233 | + \begin{lstlisting} |
| 234 | +int temp = 0; |
| 235 | +#pragma omp parallel for private(temp) |
| 236 | +for(int i = 0; i < N; i++) { |
| 237 | + temp = compute(i); |
| 238 | + result[i] = temp; |
| 239 | +} |
| 240 | + \end{lstlisting} |
| 241 | +\end{frame} |
| 242 | + |
| 243 | +\begin{frame}[fragile]{Clause: \texttt{schedule}} |
| 244 | + Applicable to directives: |
| 245 | + \begin{itemize} |
| 246 | + \item \texttt{for}, \texttt{parallel for} |
| 247 | + \end{itemize} |
| 248 | + |
| 249 | + Controls iteration distribution among threads: |
| 250 | + |
| 251 | + Syntax: |
| 252 | + \begin{verbatim} |
| 253 | + schedule(type, chunk_size) |
| 254 | + \end{verbatim} |
| 255 | + |
| 256 | + Types: |
| 257 | + \begin{itemize} |
| 258 | + \item \texttt{static} (default) |
| 259 | + \item \texttt{dynamic} |
| 260 | + \item \texttt{guided} |
| 261 | + \end{itemize} |
| 262 | + |
| 263 | + Example (\texttt{parallel for}): |
| 264 | + \lstset{style=CStyle} |
| 265 | + \begin{lstlisting} |
| 266 | +#pragma omp parallel for schedule(dynamic,4) |
| 267 | +for(int i = 0; i < N; i++) { |
| 268 | + heavy_computation(i); |
| 269 | +} |
| 270 | + \end{lstlisting} |
| 271 | +\end{frame} |
| 272 | + |
| 273 | +\begin{frame}[fragile]{Clause: \texttt{reduction}} |
| 274 | + Applicable to directives: |
| 275 | + \begin{itemize} |
| 276 | + \item \texttt{parallel}, \texttt{for}, \texttt{parallel for} |
| 277 | + \end{itemize} |
| 278 | + |
| 279 | + Combines thread results safely into one variable. |
| 280 | + |
| 281 | + Syntax: |
| 282 | + \begin{verbatim} |
| 283 | + reduction(operator: variable) |
| 284 | + \end{verbatim} |
| 285 | + |
| 286 | + Common operators: \texttt{+, -, *, max, min} |
| 287 | + |
| 288 | + Example (\texttt{parallel for}): |
| 289 | + \lstset{style=CStyle} |
| 290 | + \begin{lstlisting} |
| 291 | +int total = 0; |
| 292 | +#pragma omp parallel for reduction(+:total) |
| 293 | +for(int i = 0; i < N; i++) { |
| 294 | + total += array[i]; |
| 295 | +} |
| 296 | +printf("Sum = %d\n", total); |
| 297 | + \end{lstlisting} |
| 298 | +\end{frame} |
| 299 | + |
| 300 | +\begin{frame}[fragile]{Clause: \texttt{num\_threads}} |
| 301 | + Applicable to directives: |
| 302 | + \begin{itemize} |
| 303 | + \item \texttt{parallel}, \texttt{parallel for} |
| 304 | + \end{itemize} |
| 305 | + |
| 306 | + Sets number of threads explicitly: |
| 307 | + |
| 308 | + Syntax: |
| 309 | + \begin{verbatim} |
| 310 | + num_threads(number_of_threads) |
| 311 | + \end{verbatim} |
| 312 | + |
| 313 | + Example (\texttt{parallel for}): |
| 314 | + \lstset{style=CStyle} |
| 315 | + \begin{lstlisting} |
| 316 | +#pragma omp parallel for num_threads(8) |
| 317 | +for(int i = 0; i < N; i++) { |
| 318 | + compute(i); |
| 319 | +} |
| 320 | + \end{lstlisting} |
| 321 | + |
| 322 | + Overrides default thread count and environment settings. |
| 323 | +\end{frame} |
| 324 | + |
| 325 | + |
| 326 | +\begin{frame}[fragile]{Sections} |
| 327 | + Use the \texttt{sections} directive to run independent tasks in parallel: |
| 328 | + |
| 329 | + \lstset{style=CStyle} |
| 330 | + \begin{lstlisting} |
| 331 | +#pragma omp parallel sections |
| 332 | +{ |
| 333 | + #pragma omp section |
| 334 | + { |
| 335 | + compute_task_A(); |
| 336 | + } |
| 337 | + #pragma omp section |
| 338 | + { |
| 339 | + compute_task_B(); |
| 340 | + } |
| 341 | + #pragma omp section |
| 342 | + { |
| 343 | + compute_task_C(); |
| 344 | + } |
| 345 | +} |
| 346 | + \end{lstlisting} |
| 347 | +\end{frame} |
| 348 | + |
| 349 | +\section{Synchronization and Data Sharing} |
| 350 | +\begin{frame}{Synchronization and Data Sharing} |
| 351 | + \begin{itemize} |
| 352 | + \item Barrier |
| 353 | + \item Critical sections |
| 354 | + \item Atomic operations |
| 355 | + \item Built-in reduction operation |
| 356 | + \item OpenMP locks (similar to mutex) |
| 357 | + \end{itemize} |
| 358 | +\end{frame} |
| 359 | + |
| 360 | +\begin{frame}[fragile]{OpenMP Barrier (\texttt{barrier})} |
| 361 | + Synchronizes threads explicitly; threads wait at the barrier until all threads arrive. |
| 362 | + |
| 363 | + Syntax: |
| 364 | + \begin{verbatim} |
| 365 | + #pragma omp barrier |
| 366 | + \end{verbatim} |
| 367 | + |
| 368 | + Example: |
| 369 | + \lstset{style=CStyle} |
| 370 | + \begin{lstlisting} |
| 371 | + #pragma omp parallel |
| 372 | + { |
| 373 | + compute_part1(); |
| 374 | + |
| 375 | + #pragma omp barrier // All threads wait here |
| 376 | + |
| 377 | + compute_part2(); // Starts only after all threads |
| 378 | + // finish compute_part1() |
| 379 | + } |
| 380 | + \end{lstlisting} |
| 381 | + |
| 382 | + Barrier ensures correct sequence in parallel regions. |
| 383 | +\end{frame} |
| 384 | + |
| 385 | +\begin{frame}[fragile]{Critical Sections (\texttt{critical})} |
| 386 | + \textbf{Purpose:} |
| 387 | + Ensures only one thread executes a code region at a time, preventing race conditions. |
| 388 | + |
| 389 | + Syntax: |
| 390 | + \begin{verbatim} |
| 391 | + #pragma omp critical [name] |
| 392 | + { |
| 393 | + // critical section |
| 394 | + } |
| 395 | + \end{verbatim} |
| 396 | + |
| 397 | + Example: |
| 398 | + \lstset{style=CStyle} |
| 399 | + \begin{lstlisting} |
| 400 | +#pragma omp parallel |
| 401 | +{ |
| 402 | + #pragma omp critical |
| 403 | + { |
| 404 | + sum += compute_value(); |
| 405 | + } |
| 406 | +} |
| 407 | + \end{lstlisting} |
| 408 | + |
| 409 | + Usage of this directive ensures safe access to the shared variables within block boundaries. |
| 410 | +\end{frame} |
| 411 | + |
| 412 | +\begin{frame}[fragile]{Named Critical Sections} |
| 413 | + Multiple named critical sections prevent unnecessary waiting. |
| 414 | + |
| 415 | + \textbf{Syntax:} |
| 416 | + \begin{verbatim} |
| 417 | + #pragma omp critical(name) |
| 418 | + { |
| 419 | + // named critical section |
| 420 | + } |
| 421 | + \end{verbatim} |
| 422 | + |
| 423 | + Example: |
| 424 | + \lstset{style=CStyle} |
| 425 | + \begin{lstlisting} |
| 426 | +#pragma omp parallel |
| 427 | +{ |
| 428 | + #pragma omp critical(update_sum) |
| 429 | + { |
| 430 | + sum += compute_sum(); |
| 431 | + } |
| 432 | + |
| 433 | + #pragma omp critical(update_max) |
| 434 | + { |
| 435 | + max_val = max(max_val, compute_val()); |
| 436 | + } |
| 437 | +} |
| 438 | + \end{lstlisting} |
| 439 | + |
| 440 | + Different named critical regions do not block each other. |
| 441 | +\end{frame} |
| 442 | + |
| 443 | +\begin{frame}[fragile]{Atomic Operations (\texttt{atomic})} |
| 444 | + Purpose: |
| 445 | + Enforces atomicity of a single memory operation. |
| 446 | + |
| 447 | + Syntax: |
| 448 | + \begin{verbatim} |
| 449 | + #pragma omp atomic |
| 450 | + expression; |
| 451 | + \end{verbatim} |
| 452 | + |
| 453 | + Supported operations: \texttt{+, -, *, /, \&, |, \^, ++, --} |
| 454 | + |
| 455 | + Example: |
| 456 | + \lstset{style=CStyle} |
| 457 | + \begin{lstlisting} |
| 458 | +#pragma omp parallel for |
| 459 | +for(int i = 0; i < N; i++) { |
| 460 | + #pragma omp atomic |
| 461 | + count += array[i]; |
| 462 | +} |
| 463 | + \end{lstlisting} |
| 464 | + |
| 465 | + It is more efficient than \texttt{critical} for simple arithmetic. |
| 466 | +\end{frame} |
| 467 | + |
| 468 | +\begin{frame}{\texttt{critical} vs. \texttt{atomic}} |
| 469 | + Key differences between these synchronization methods: |
| 470 | + |
| 471 | + \begin{itemize} |
| 472 | + \item Critical Sections: |
| 473 | + \begin{itemize} |
| 474 | + \item Allows arbitrary blocks of code. |
| 475 | + \item More general-purpose, but potentially slower due to locking overhead. |
| 476 | + \end{itemize} |
| 477 | + |
| 478 | + \item Atomic Operations: |
| 479 | + \begin{itemize} |
| 480 | + \item Limited to single, simple memory operations. |
| 481 | + \item Faster, uses hardware-level instructions. |
| 482 | + \end{itemize} |
| 483 | + \end{itemize} |
| 484 | + |
| 485 | + Use \texttt{atomic} for simple operations, \texttt{critical} for more complex sections. |
| 486 | +\end{frame} |
| 487 | + |
| 488 | +\section{OpenMP functions} |
| 489 | +\begin{frame}[fragile]{OpenMP Functions: Thread Management} |
| 490 | + Control and query the number of threads. |
| 491 | + |
| 492 | + Commonly used functions: |
| 493 | + \begin{itemize} |
| 494 | + \item \texttt{omp\_set\_num\_threads(int n)} |
| 495 | + \item \texttt{omp\_get\_num\_threads()} |
| 496 | + \item \texttt{omp\_get\_thread\_num()} |
| 497 | + \item \texttt{omp\_get\_max\_threads()} |
| 498 | + \end{itemize} |
| 499 | + |
| 500 | + Example: |
| 501 | + \lstset{style=CStyle} |
| 502 | + \begin{lstlisting} |
| 503 | +omp_set_num_threads(4); |
| 504 | +#pragma omp parallel |
| 505 | +{ |
| 506 | + int tid = omp_get_thread_num(); |
| 507 | + printf("Hello from thread %d\n", tid); |
| 508 | +} |
| 509 | + \end{lstlisting} |
| 510 | +\end{frame} |
| 511 | + |
| 512 | +\begin{frame}[fragile]{OpenMP Locks} |
| 513 | + Purpose: |
| 514 | + Provide explicit, fine-grained control of synchronization for critical regions. |
| 515 | + |
| 516 | + API: |
| 517 | + \begin{itemize} |
| 518 | + \item \texttt{omp\_init\_lock()} — Initializes a lock |
| 519 | + \item \texttt{omp\_set\_lock()} — Locks (blocks if unavailable) |
| 520 | + \item \texttt{omp\_unset\_lock()} — Releases a lock |
| 521 | + \item \texttt{omp\_destroy\_lock()} — Frees lock resources |
| 522 | + \end{itemize} |
| 523 | + |
| 524 | + Example: |
| 525 | + \lstset{style=CStyle} |
| 526 | + \begin{lstlisting} |
| 527 | +omp_lock_t lock; |
| 528 | +omp_init_lock(&lock); |
| 529 | + |
| 530 | +#pragma omp parallel for |
| 531 | +for(int i = 0; i < N; i++) { |
| 532 | + omp_set_lock(&lock); |
| 533 | + sum += compute(i); |
| 534 | + omp_unset_lock(&lock); |
| 535 | +} |
| 536 | + |
| 537 | +omp_destroy_lock(&lock); |
| 538 | + \end{lstlisting} |
| 539 | + |
| 540 | + Explicit locking provides precise synchronization control. |
| 541 | +\end{frame} |
| 542 | + |
| 543 | +\begin{frame}[fragile]{OpenMP Functions: Timing} |
| 544 | + Useful functions for measuring execution time: |
| 545 | + |
| 546 | + \begin{itemize} |
| 547 | + \item \texttt{omp\_get\_wtime()} — returns current time in seconds. |
| 548 | + \item \texttt{omp\_get\_wtick()} — precision of timer. |
| 549 | + \end{itemize} |
| 550 | + |
| 551 | + Example: |
| 552 | + \lstset{style=CStyle} |
| 553 | + \begin{lstlisting} |
| 554 | +double start = omp_get_wtime(); |
| 555 | + |
| 556 | +#pragma omp parallel for |
| 557 | +for(int i = 0; i < N; i++) { |
| 558 | + heavy_computation(i); |
| 559 | +} |
| 560 | + |
| 561 | +double end = omp_get_wtime(); |
| 562 | +printf("Elapsed time: %f seconds\n", end-start); |
| 563 | + \end{lstlisting} |
| 564 | +\end{frame} |
| 565 | + |
| 566 | +\section{Environment variables} |
| 567 | + |
| 568 | +\begin{frame}{Environment Variables in OpenMP} |
| 569 | +Environment variables control OpenMP runtime behavior without recompilation. |
| 570 | + |
| 571 | +Common environment variables include: |
| 572 | + \begin{itemize} |
| 573 | + \item \texttt{OMP\_NUM\_THREADS} |
| 574 | + \item \texttt{OMP\_SCHEDULE} |
| 575 | + \item \texttt{OMP\_DYNAMIC} |
| 576 | + \item \texttt{OMP\_NESTED} |
| 577 | + \end{itemize} |
| 578 | +\end{frame} |
| 579 | + |
| 580 | +\begin{frame}[fragile]{\texttt{OMP\_NUM\_THREADS}} |
| 581 | + Specifies the default number of threads. |
| 582 | + |
| 583 | + Example usage: |
| 584 | + \begin{verbatim} |
| 585 | + export OMP_NUM_THREADS=8 |
| 586 | + ./my_program |
| 587 | + \end{verbatim} |
| 588 | + |
| 589 | + Overrides default or explicitly set number of threads within code unless set otherwise by \texttt{num\_threads} clause. |
| 590 | +\end{frame} |
| 591 | + |
| 592 | +\begin{frame}[fragile]{\texttt{OMP\_SCHEDULE}} |
| 593 | + Sets default scheduling policy for loops with the \texttt{schedule(runtime)} clause. |
| 594 | + |
| 595 | + Syntax: |
| 596 | + \begin{verbatim} |
| 597 | + export OMP_SCHEDULE="type,chunk" |
| 598 | + \end{verbatim} |
| 599 | + |
| 600 | + Example: |
| 601 | + \begin{verbatim} |
| 602 | + export OMP_SCHEDULE="dynamic,4" |
| 603 | + ./my_program |
| 604 | + \end{verbatim} |
| 605 | + |
| 606 | + Affects loops declared as: |
| 607 | + \begin{verbatim} |
| 608 | + #pragma omp parallel for schedule(runtime) |
| 609 | + \end{verbatim} |
| 610 | +\end{frame} |
| 611 | + |
| 612 | +\begin{frame}[fragile]{\texttt{OMP\_DYNAMIC} and \texttt{OMP\_NESTED}} |
| 613 | + Enables dynamic thread adjustment (true/false). |
| 614 | + |
| 615 | + Example: |
| 616 | + \begin{verbatim} |
| 617 | + export OMP_DYNAMIC=true |
| 618 | + \end{verbatim} |
| 619 | +\end{frame} |
| 620 | + |
| 621 | + |
| 622 | +\begin{frame}[fragile]{\texttt{OMP\_DYNAMIC} and \texttt{OMP\_NESTED}} |
| 623 | + Allows nested parallelism (true/false). |
| 624 | + |
| 625 | + Example: |
| 626 | + \begin{verbatim} |
| 627 | + export OMP_NESTED=true |
| 628 | + \end{verbatim} |
| 629 | + |
| 630 | + Nested parallel regions: |
| 631 | + \lstset{style=CStyle} |
| 632 | + \begin{lstlisting} |
| 633 | +#pragma omp parallel num_threads(2) |
| 634 | +{ |
| 635 | + #pragma omp parallel num_threads(2) |
| 636 | + { |
| 637 | + // Nested region, total 4 threads |
| 638 | + } |
| 639 | +} |
| 640 | + \end{lstlisting} |
| 641 | +\end{frame} |
| 642 | + |
| 643 | +\begin{frame} |
| 644 | + \centering |
| 645 | + \Huge{Thank You!} |
| 646 | +\end{frame} |
| 647 | + |
| 648 | +\begin{frame}{References} |
| 649 | + \begin{itemize} |
| 650 | + \item OpenMP Official Specification: \url{https://www.openmp.org/specifications/} |
| 651 | + \item OpenMP Reference Guides: \url{https://www.openmp.org/resources/refguides/} |
| 652 | + \end{itemize} |
| 653 | +\end{frame} |
| 654 | + |
| 655 | +\end{document} |
0 commit comments