#pragma omp single { printf("single thread\n"); } printf("do thread %d after single\n", omp_get_thread_num()); }
输出
1 2 3 4 5 6 7 8 9
do thread 1 do thread 0 do thread 3 do thread 2 single thread do thread 2 after single do thread 0 after single do thread 1 after single do thread 3 after single
void ompPrivateVariable() { int k = 100; #pragma omp parallel for private(k) for(k = 10; k < 20; k ++ ) { printf("k = %d from thread %d\n", k, omp_get_thread_num()); } printf("k = %d from outsise\n", k); }
firstprivate
可以从并行区域外拿到某个变量的副本,但无法修改并行区域外的变量。
lastprivate
退出并行区域时,将该变量赋给外部共享变量
1 2 3 4 5 6 7 8 9 10
void ompLastPrivateVar() { int k = 100; int i = 0; #pragma omp parallel for firstprivate(k), lastprivate(k) for(i = 0; i < 10; i ++) { k += i; printf("update k = %d from thread %d\n", k, omp_get_thread_num()); } printf("last k = %d\n",k); }
输出
1 2 3 4 5 6 7 8 9 10 11
update k = 100 from thread 0 update k = 106 from thread 2 update k = 103 from thread 1 update k = 108 from thread 3 update k = 101 from thread 0 update k = 113 from thread 2 update k = 107 from thread 1 update k = 117 from thread 3 update k = 103 from thread 0 update k = 112 from thread 1 last k = 117
语句
reduction
1 2 3 4
#pragma omp parallel for reduction(+: sum) for(i = 0; i < 10; i ++) { sum += i; }
thread 1 sum thread 3 sum thread 2 sum thread 0 sum thread 1 sum thread 3 sum thread 2 sum thread 0 sum thread 1 sum thread 0 sum thread 0 Barrier thread 1 Barrier thread 3 Barrier thread 2 Barrier sum is 90 thread 0 end sum is 180 thread 1 end sum is 360 thread 3 end sum is 720 thread 2 end
nowait
消除一些隐含的barrier。
1 2 3 4 5 6 7 8 9 10 11 12 13
int i = 0; #pragma omp parallel num_threads(4) { #pragma omp for nowait for(i = 0; i < 4; i ++) { printf("thread %d i = %d\n",omp_get_thread_num(), i); } printf("no wait\n"); #pragma omp for nowait for(; i < 7; i++) { printf("s thread %d i = %d\n",omp_get_thread_num(), i); } }
输出,并不会等前一个for循环执行完才去执行下一个循环
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
thread 0 i = 0 no wait s thread 0 i = 0 s thread 0 i = 1 thread 1 i = 1 no wait s thread 1 i = 2 s thread 1 i = 3 thread 2 i = 2 no wait s thread 2 i = 4 s thread 2 i = 5 thread 3 i = 3 no wait s thread 3 i = 6
critical
临界区变量的保华,一个时间内只能有一个线程访问。
atomic
原子操作
1 2 3 4 5 6
int i = 0, nVar = 0; #pragma omp parallel for shared(nVar) for(i = 0; i < 10; i ++) { #pragma omp atomic nVar ++; }
Thank you for your support, I will continue to work hard!