-
Notifications
You must be signed in to change notification settings - Fork 2
/
main51.cpp
77 lines (66 loc) · 2.1 KB
/
main51.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <iostream>
#include <sys/time.h>
using namespace std;
double dtime() {
double tseconds = 0;
struct timeval t;
gettimeofday(&t, NULL);
tseconds = (double)t.tv_sec + (double)t.tv_usec * 1.0e-6;
return tseconds;
}
int main(int argc, char **argv) {
const int width = 20000;
const int height = 20000;
const int iters = 10;
double *gridA;
double *gridB;
gridA = (double *)malloc(width * height * sizeof(double));
gridB = (double *)malloc(width * height * sizeof(double));
#pragma omp parallel for
for (int y = 0; y < height; y++) {
for (int x = 0; x < width; x++) {
if (y != 0) {
gridA[y * width + x] = 0.0;
gridB[y * width + x] = 0.0;
} else {
gridA[y * width + x] = 1.0;
gridB[y * width + x] = 1.0;
}
}
}
#pragma omp target enter data map(to \
: gridA[:width * height], gridB \
[:width * height])
double t1 = dtime();
for (int it = 0; it < iters; it += 2) {
#pragma omp target teams distribute parallel for collapse(2)
for (int y = 1; y < height - 1; y++) {
for (int x = 1; x < width - 1; x++) {
gridA[y * width + x] =
0.25 * (gridB[y * width + x + 1] +
gridB[y * width + x - 1] +
gridB[(y + 1) * width + x] +
gridB[(y - 1) * width + x]);
}
}
#pragma omp target teams distribute parallel for collapse(2)
for (int y = 1; y < height - 1; y++) {
for (int x = 1; x < width - 1; x++) {
gridB[y * width + x] =
0.25 * (gridA[y * width + x + 1] +
gridA[y * width + x - 1] +
gridA[(y + 1) * width + x] +
gridA[(y - 1) * width + x]);
}
}
}
double t2 = dtime();
double dt = t2 - t1;
cout << dt*1000 << " ms " << 4 * (int64_t)iters * width * height / dt / 1e9
<< " GLup/s "
<< (int64_t)iters * width * height * sizeof(double) * 2 / dt / 1e9
<< " GB/s\n";
free(gridA);
free(gridB);
return 0;
}