Skip to content

Commit 9cf4aed

Browse files
authoredMar 18, 2025··
Merge pull request learning-process#50 from aobolensk/07-openmp
2 parents 6b7488f + d7cc4cb commit 9cf4aed

File tree

2 files changed

+662
-0
lines changed

2 files changed

+662
-0
lines changed
 

‎07-openmp/07-openmp.tex

Lines changed: 655 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,655 @@
1+
\documentclass{beamer}
2+
3+
% Theme choice
4+
\usetheme{Madrid}
5+
6+
% Optional packages
7+
\usepackage{graphicx} % For including images
8+
\usepackage{amsmath} % For math symbols and formulas
9+
\usepackage{hyperref} % For hyperlinks
10+
\usepackage{tikz} % For charts
11+
\usepackage{listings}
12+
\usepackage{xcolor}
13+
\usepackage[T1]{fontenc}
14+
15+
\lstdefinestyle{CStyle}{
16+
language=C, % Set the language to C
17+
basicstyle=\ttfamily\footnotesize\linespread{0.9}\tiny, % Set font style and size
18+
keywordstyle=\color{blue}, % Color of keywords
19+
commentstyle=\color{gray}, % Color of comments
20+
stringstyle=\color{red}, % Color of strings
21+
showstringspaces=false, % Do not mark spaces in strings
22+
breaklines=true, % Enable line breaks at appropriate places
23+
breakatwhitespace=false, % Break lines at any character, not just whitespace
24+
numbers=left, % Show line numbers on the left
25+
numberstyle=\tiny\color{gray}, % Style for line numbers
26+
tabsize=4, % Set tab width
27+
keepspaces=true, % Keep indentation spaces
28+
frame=single, % Add a border around the code
29+
aboveskip=0pt, % Reduce space above the code block
30+
belowskip=0pt, % Reduce space below the code block
31+
xleftmargin=7.5pt, % Add left padding (approx. 2.8mm or 10px)
32+
xrightmargin=15pt, % Add left padding (approx. 2.8mm or 10px)
33+
}
34+
35+
% Title, author, date, and institute (optional)
36+
\title[Parallel Programming Course. OpenMP.]{Parallel Programming Course. \\OpenMP.}
37+
\author{Obolenskiy Arseniy, Nesterov Alexander}
38+
\institute{Nizhny Novgorod State University}
39+
40+
\date{\today} % or \date{Month Day, Year}
41+
42+
% Redefine the footline to display both the short title and the university name
43+
\setbeamertemplate{footline}{
44+
\leavevmode%
45+
\hbox{%
46+
\begin{beamercolorbox}[wd=.45\paperwidth,ht=2.5ex,dp=1ex,leftskip=1em,center]{author in head/foot}%
47+
\usebeamerfont{author in head/foot}\insertshortinstitute % Displays the university name
48+
\end{beamercolorbox}%
49+
\begin{beamercolorbox}[wd=.45\paperwidth,ht=2.5ex,dp=1ex,leftskip=1em,center]{author in head/foot}%
50+
\usebeamerfont{author in head/foot}\insertshorttitle % Displays the short title
51+
\end{beamercolorbox}%
52+
\begin{beamercolorbox}[wd=.1\paperwidth,ht=2.5ex,dp=1ex,rightskip=1em,center]{author in head/foot}%
53+
\usebeamerfont{author in head/foot}\insertframenumber{} / \inserttotalframenumber
54+
\end{beamercolorbox}}%
55+
\vskip0pt%
56+
}
57+
58+
\begin{document}
59+
60+
% Title slide
61+
\begin{frame}
62+
\titlepage
63+
\end{frame}
64+
65+
\begin{frame}{Today}
66+
\tableofcontents
67+
\end{frame}
68+
69+
\section{Introduction to OpenMP}
70+
\begin{frame}{What is OpenMP?}
71+
\begin{itemize}
72+
\item Brief overview
73+
\item Importance in parallel computing
74+
\item Use cases
75+
\end{itemize}
76+
\end{frame}
77+
78+
\begin{frame}{What is OpenMP?}
79+
\begin{itemize}
80+
\item Open standard for parallel programming
81+
\item Supports multi-platform shared-memory multiprocessing
82+
\item Used in computational science, engineering, and simulations
83+
\end{itemize}
84+
\end{frame}
85+
86+
\section{Hello World}
87+
\begin{frame}[fragile]{Your First OpenMP Program}
88+
\lstset{style=CStyle}
89+
\begin{lstlisting}
90+
#include <omp.h>
91+
#include <stdio.h>
92+
93+
int main() {
94+
#pragma omp parallel
95+
{
96+
printf("Hello from thread %d\n", omp_get_thread_num());
97+
}
98+
return 0;
99+
}
100+
\end{lstlisting}
101+
\end{frame}
102+
103+
\section{Basic OpenMP Features}
104+
\begin{frame}{OpenMP Features}
105+
\begin{itemize}
106+
\item Compiler directives
107+
\item Runtime library functions
108+
\item Environment variables
109+
\end{itemize}
110+
\end{frame}
111+
112+
\begin{frame}{Basic OpenMP Features}
113+
OpenMP provides three primary mechanisms to express parallelism clearly and effectively:
114+
115+
\begin{itemize}
116+
\item Compiler directives (\texttt{\#pragma omp})
117+
\item Runtime library functions
118+
\item Environment variables
119+
\end{itemize}
120+
\end{frame}
121+
122+
\section{Compiler Directives and Clauses}
123+
\begin{frame}[fragile]{Compiler Directives}
124+
Compiler directives guide the compiler to parallelize sections of code.
125+
126+
General syntax:
127+
\begin{verbatim}
128+
#pragma omp directive [clauses]
129+
\end{verbatim}
130+
131+
Commonly used directives:
132+
\begin{itemize}
133+
\item \texttt{parallel} — Creates parallel threads.
134+
\item \texttt{for} — Parallelizes loop iterations.
135+
\item \texttt{section} — Defines parallel sections.
136+
\end{itemize}
137+
138+
Example:
139+
\lstset{style=CStyle}
140+
\begin{lstlisting}
141+
#pragma omp parallel for
142+
for(int i = 0; i < N; i++) {
143+
a[i] = b[i] + c[i];
144+
}
145+
\end{lstlisting}
146+
\end{frame}
147+
148+
\begin{frame}[fragile]{The \texttt{parallel} Directive}
149+
The \texttt{parallel} directive starts a parallel region executed by multiple threads.
150+
151+
Syntax:
152+
\begin{verbatim}
153+
#pragma omp parallel [clauses]
154+
{
155+
// Code executed in parallel
156+
}
157+
\end{verbatim}
158+
159+
Example:
160+
\lstset{style=CStyle}
161+
\begin{lstlisting}
162+
#pragma omp parallel
163+
{
164+
printf("Thread %d is running\n", omp_get_thread_num());
165+
}
166+
\end{lstlisting}
167+
\end{frame}
168+
169+
\begin{frame}[fragile]{The \texttt{for} Directive}
170+
The \texttt{for} directive parallelizes loops among threads.
171+
172+
To take an effect it must be within an existing parallel region:
173+
174+
Syntax:
175+
\begin{verbatim}
176+
#pragma omp for [clauses]
177+
for (init; condition; increment) {
178+
// Loop body
179+
}
180+
\end{verbatim}
181+
182+
Example:
183+
\lstset{style=CStyle}
184+
\begin{lstlisting}
185+
#pragma omp parallel
186+
{
187+
#pragma omp for
188+
for (int i = 0; i < N; i++) {
189+
array[i] = compute(i);
190+
}
191+
}
192+
\end{lstlisting}
193+
\end{frame}
194+
195+
\begin{frame}[fragile]{The \texttt{parallel for} Directive}
196+
Combines the \texttt{parallel} and \texttt{for} directives, simplifying syntax.
197+
198+
Syntax:
199+
\begin{verbatim}
200+
#pragma omp parallel for [clauses]
201+
for (init; condition; increment) {
202+
// Loop body
203+
}
204+
\end{verbatim}
205+
206+
Example:
207+
\lstset{style=CStyle}
208+
\begin{lstlisting}
209+
#pragma omp parallel for
210+
for (int i = 0; i < N; i++) {
211+
data[i] = process(i);
212+
}
213+
\end{lstlisting}
214+
215+
This is equivalent to a \texttt{parallel} region with a single \texttt{for} loop.
216+
\end{frame}
217+
218+
\begin{frame}[fragile]{Clauses: \texttt{private} and \texttt{shared}}
219+
Applicable to directives:
220+
\begin{itemize}
221+
\item \texttt{parallel}, \texttt{for}, \texttt{parallel for}
222+
\end{itemize}
223+
224+
Controls the scope of variables:
225+
226+
\begin{itemize}
227+
\item \texttt{shared(var)}: Variable shared among threads (default).
228+
\item \texttt{private(var)}: Each thread gets its own private copy.
229+
\end{itemize}
230+
231+
Example (\texttt{parallel for}):
232+
\lstset{style=CStyle}
233+
\begin{lstlisting}
234+
int temp = 0;
235+
#pragma omp parallel for private(temp)
236+
for(int i = 0; i < N; i++) {
237+
temp = compute(i);
238+
result[i] = temp;
239+
}
240+
\end{lstlisting}
241+
\end{frame}
242+
243+
\begin{frame}[fragile]{Clause: \texttt{schedule}}
244+
Applicable to directives:
245+
\begin{itemize}
246+
\item \texttt{for}, \texttt{parallel for}
247+
\end{itemize}
248+
249+
Controls iteration distribution among threads:
250+
251+
Syntax:
252+
\begin{verbatim}
253+
schedule(type, chunk_size)
254+
\end{verbatim}
255+
256+
Types:
257+
\begin{itemize}
258+
\item \texttt{static} (default)
259+
\item \texttt{dynamic}
260+
\item \texttt{guided}
261+
\end{itemize}
262+
263+
Example (\texttt{parallel for}):
264+
\lstset{style=CStyle}
265+
\begin{lstlisting}
266+
#pragma omp parallel for schedule(dynamic,4)
267+
for(int i = 0; i < N; i++) {
268+
heavy_computation(i);
269+
}
270+
\end{lstlisting}
271+
\end{frame}
272+
273+
\begin{frame}[fragile]{Clause: \texttt{reduction}}
274+
Applicable to directives:
275+
\begin{itemize}
276+
\item \texttt{parallel}, \texttt{for}, \texttt{parallel for}
277+
\end{itemize}
278+
279+
Combines thread results safely into one variable.
280+
281+
Syntax:
282+
\begin{verbatim}
283+
reduction(operator: variable)
284+
\end{verbatim}
285+
286+
Common operators: \texttt{+, -, *, max, min}
287+
288+
Example (\texttt{parallel for}):
289+
\lstset{style=CStyle}
290+
\begin{lstlisting}
291+
int total = 0;
292+
#pragma omp parallel for reduction(+:total)
293+
for(int i = 0; i < N; i++) {
294+
total += array[i];
295+
}
296+
printf("Sum = %d\n", total);
297+
\end{lstlisting}
298+
\end{frame}
299+
300+
\begin{frame}[fragile]{Clause: \texttt{num\_threads}}
301+
Applicable to directives:
302+
\begin{itemize}
303+
\item \texttt{parallel}, \texttt{parallel for}
304+
\end{itemize}
305+
306+
Sets number of threads explicitly:
307+
308+
Syntax:
309+
\begin{verbatim}
310+
num_threads(number_of_threads)
311+
\end{verbatim}
312+
313+
Example (\texttt{parallel for}):
314+
\lstset{style=CStyle}
315+
\begin{lstlisting}
316+
#pragma omp parallel for num_threads(8)
317+
for(int i = 0; i < N; i++) {
318+
compute(i);
319+
}
320+
\end{lstlisting}
321+
322+
Overrides default thread count and environment settings.
323+
\end{frame}
324+
325+
326+
\begin{frame}[fragile]{Sections}
327+
Use the \texttt{sections} directive to run independent tasks in parallel:
328+
329+
\lstset{style=CStyle}
330+
\begin{lstlisting}
331+
#pragma omp parallel sections
332+
{
333+
#pragma omp section
334+
{
335+
compute_task_A();
336+
}
337+
#pragma omp section
338+
{
339+
compute_task_B();
340+
}
341+
#pragma omp section
342+
{
343+
compute_task_C();
344+
}
345+
}
346+
\end{lstlisting}
347+
\end{frame}
348+
349+
\section{Synchronization and Data Sharing}
350+
\begin{frame}{Synchronization and Data Sharing}
351+
\begin{itemize}
352+
\item Barrier
353+
\item Critical sections
354+
\item Atomic operations
355+
\item Built-in reduction operation
356+
\item OpenMP locks (similar to mutex)
357+
\end{itemize}
358+
\end{frame}
359+
360+
\begin{frame}[fragile]{OpenMP Barrier (\texttt{barrier})}
361+
Synchronizes threads explicitly; threads wait at the barrier until all threads arrive.
362+
363+
Syntax:
364+
\begin{verbatim}
365+
#pragma omp barrier
366+
\end{verbatim}
367+
368+
Example:
369+
\lstset{style=CStyle}
370+
\begin{lstlisting}
371+
#pragma omp parallel
372+
{
373+
compute_part1();
374+
375+
#pragma omp barrier // All threads wait here
376+
377+
compute_part2(); // Starts only after all threads
378+
// finish compute_part1()
379+
}
380+
\end{lstlisting}
381+
382+
Barrier ensures correct sequence in parallel regions.
383+
\end{frame}
384+
385+
\begin{frame}[fragile]{Critical Sections (\texttt{critical})}
386+
\textbf{Purpose:}
387+
Ensures only one thread executes a code region at a time, preventing race conditions.
388+
389+
Syntax:
390+
\begin{verbatim}
391+
#pragma omp critical [name]
392+
{
393+
// critical section
394+
}
395+
\end{verbatim}
396+
397+
Example:
398+
\lstset{style=CStyle}
399+
\begin{lstlisting}
400+
#pragma omp parallel
401+
{
402+
#pragma omp critical
403+
{
404+
sum += compute_value();
405+
}
406+
}
407+
\end{lstlisting}
408+
409+
Usage of this directive ensures safe access to the shared variables within block boundaries.
410+
\end{frame}
411+
412+
\begin{frame}[fragile]{Named Critical Sections}
413+
Multiple named critical sections prevent unnecessary waiting.
414+
415+
\textbf{Syntax:}
416+
\begin{verbatim}
417+
#pragma omp critical(name)
418+
{
419+
// named critical section
420+
}
421+
\end{verbatim}
422+
423+
Example:
424+
\lstset{style=CStyle}
425+
\begin{lstlisting}
426+
#pragma omp parallel
427+
{
428+
#pragma omp critical(update_sum)
429+
{
430+
sum += compute_sum();
431+
}
432+
433+
#pragma omp critical(update_max)
434+
{
435+
max_val = max(max_val, compute_val());
436+
}
437+
}
438+
\end{lstlisting}
439+
440+
Different named critical regions do not block each other.
441+
\end{frame}
442+
443+
\begin{frame}[fragile]{Atomic Operations (\texttt{atomic})}
444+
Purpose:
445+
Enforces atomicity of a single memory operation.
446+
447+
Syntax:
448+
\begin{verbatim}
449+
#pragma omp atomic
450+
expression;
451+
\end{verbatim}
452+
453+
Supported operations: \texttt{+, -, *, /, \&, |, \^, ++, --}
454+
455+
Example:
456+
\lstset{style=CStyle}
457+
\begin{lstlisting}
458+
#pragma omp parallel for
459+
for(int i = 0; i < N; i++) {
460+
#pragma omp atomic
461+
count += array[i];
462+
}
463+
\end{lstlisting}
464+
465+
It is more efficient than \texttt{critical} for simple arithmetic.
466+
\end{frame}
467+
468+
\begin{frame}{\texttt{critical} vs. \texttt{atomic}}
469+
Key differences between these synchronization methods:
470+
471+
\begin{itemize}
472+
\item Critical Sections:
473+
\begin{itemize}
474+
\item Allows arbitrary blocks of code.
475+
\item More general-purpose, but potentially slower due to locking overhead.
476+
\end{itemize}
477+
478+
\item Atomic Operations:
479+
\begin{itemize}
480+
\item Limited to single, simple memory operations.
481+
\item Faster, uses hardware-level instructions.
482+
\end{itemize}
483+
\end{itemize}
484+
485+
Use \texttt{atomic} for simple operations, \texttt{critical} for more complex sections.
486+
\end{frame}
487+
488+
\section{OpenMP functions}
489+
\begin{frame}[fragile]{OpenMP Functions: Thread Management}
490+
Control and query the number of threads.
491+
492+
Commonly used functions:
493+
\begin{itemize}
494+
\item \texttt{omp\_set\_num\_threads(int n)}
495+
\item \texttt{omp\_get\_num\_threads()}
496+
\item \texttt{omp\_get\_thread\_num()}
497+
\item \texttt{omp\_get\_max\_threads()}
498+
\end{itemize}
499+
500+
Example:
501+
\lstset{style=CStyle}
502+
\begin{lstlisting}
503+
omp_set_num_threads(4);
504+
#pragma omp parallel
505+
{
506+
int tid = omp_get_thread_num();
507+
printf("Hello from thread %d\n", tid);
508+
}
509+
\end{lstlisting}
510+
\end{frame}
511+
512+
\begin{frame}[fragile]{OpenMP Locks}
513+
Purpose:
514+
Provide explicit, fine-grained control of synchronization for critical regions.
515+
516+
API:
517+
\begin{itemize}
518+
\item \texttt{omp\_init\_lock()} — Initializes a lock
519+
\item \texttt{omp\_set\_lock()} — Locks (blocks if unavailable)
520+
\item \texttt{omp\_unset\_lock()} — Releases a lock
521+
\item \texttt{omp\_destroy\_lock()} — Frees lock resources
522+
\end{itemize}
523+
524+
Example:
525+
\lstset{style=CStyle}
526+
\begin{lstlisting}
527+
omp_lock_t lock;
528+
omp_init_lock(&lock);
529+
530+
#pragma omp parallel for
531+
for(int i = 0; i < N; i++) {
532+
omp_set_lock(&lock);
533+
sum += compute(i);
534+
omp_unset_lock(&lock);
535+
}
536+
537+
omp_destroy_lock(&lock);
538+
\end{lstlisting}
539+
540+
Explicit locking provides precise synchronization control.
541+
\end{frame}
542+
543+
\begin{frame}[fragile]{OpenMP Functions: Timing}
544+
Useful functions for measuring execution time:
545+
546+
\begin{itemize}
547+
\item \texttt{omp\_get\_wtime()} — returns current time in seconds.
548+
\item \texttt{omp\_get\_wtick()} — precision of timer.
549+
\end{itemize}
550+
551+
Example:
552+
\lstset{style=CStyle}
553+
\begin{lstlisting}
554+
double start = omp_get_wtime();
555+
556+
#pragma omp parallel for
557+
for(int i = 0; i < N; i++) {
558+
heavy_computation(i);
559+
}
560+
561+
double end = omp_get_wtime();
562+
printf("Elapsed time: %f seconds\n", end-start);
563+
\end{lstlisting}
564+
\end{frame}
565+
566+
\section{Environment variables}
567+
568+
\begin{frame}{Environment Variables in OpenMP}
569+
Environment variables control OpenMP runtime behavior without recompilation.
570+
571+
Common environment variables include:
572+
\begin{itemize}
573+
\item \texttt{OMP\_NUM\_THREADS}
574+
\item \texttt{OMP\_SCHEDULE}
575+
\item \texttt{OMP\_DYNAMIC}
576+
\item \texttt{OMP\_NESTED}
577+
\end{itemize}
578+
\end{frame}
579+
580+
\begin{frame}[fragile]{\texttt{OMP\_NUM\_THREADS}}
581+
Specifies the default number of threads.
582+
583+
Example usage:
584+
\begin{verbatim}
585+
export OMP_NUM_THREADS=8
586+
./my_program
587+
\end{verbatim}
588+
589+
Overrides default or explicitly set number of threads within code unless set otherwise by \texttt{num\_threads} clause.
590+
\end{frame}
591+
592+
\begin{frame}[fragile]{\texttt{OMP\_SCHEDULE}}
593+
Sets default scheduling policy for loops with the \texttt{schedule(runtime)} clause.
594+
595+
Syntax:
596+
\begin{verbatim}
597+
export OMP_SCHEDULE="type,chunk"
598+
\end{verbatim}
599+
600+
Example:
601+
\begin{verbatim}
602+
export OMP_SCHEDULE="dynamic,4"
603+
./my_program
604+
\end{verbatim}
605+
606+
Affects loops declared as:
607+
\begin{verbatim}
608+
#pragma omp parallel for schedule(runtime)
609+
\end{verbatim}
610+
\end{frame}
611+
612+
\begin{frame}[fragile]{\texttt{OMP\_DYNAMIC} and \texttt{OMP\_NESTED}}
613+
Enables dynamic thread adjustment (true/false).
614+
615+
Example:
616+
\begin{verbatim}
617+
export OMP_DYNAMIC=true
618+
\end{verbatim}
619+
\end{frame}
620+
621+
622+
\begin{frame}[fragile]{\texttt{OMP\_DYNAMIC} and \texttt{OMP\_NESTED}}
623+
Allows nested parallelism (true/false).
624+
625+
Example:
626+
\begin{verbatim}
627+
export OMP_NESTED=true
628+
\end{verbatim}
629+
630+
Nested parallel regions:
631+
\lstset{style=CStyle}
632+
\begin{lstlisting}
633+
#pragma omp parallel num_threads(2)
634+
{
635+
#pragma omp parallel num_threads(2)
636+
{
637+
// Nested region, total 4 threads
638+
}
639+
}
640+
\end{lstlisting}
641+
\end{frame}
642+
643+
\begin{frame}
644+
\centering
645+
\Huge{Thank You!}
646+
\end{frame}
647+
648+
\begin{frame}{References}
649+
\begin{itemize}
650+
\item OpenMP Official Specification: \url{https://www.openmp.org/specifications/}
651+
\item OpenMP Reference Guides: \url{https://www.openmp.org/resources/refguides/}
652+
\end{itemize}
653+
\end{frame}
654+
655+
\end{document}

‎07-openmp/07-openmp.toc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
\beamer@sectionintoc {1}{Introduction to OpenMP}{3}{0}{1}
2+
\beamer@sectionintoc {2}{Hello World}{5}{0}{2}
3+
\beamer@sectionintoc {3}{Basic OpenMP Features}{6}{0}{3}
4+
\beamer@sectionintoc {4}{Compiler Directives and Clauses}{8}{0}{4}
5+
\beamer@sectionintoc {5}{Synchronization and Data Sharing}{17}{0}{5}
6+
\beamer@sectionintoc {6}{OpenMP functions}{23}{0}{6}
7+
\beamer@sectionintoc {7}{Environment variables}{26}{0}{7}

0 commit comments

Comments
 (0)
Please sign in to comment.