-
Notifications
You must be signed in to change notification settings - Fork 1
/
partsum_both.c
98 lines (87 loc) · 2.46 KB
/
partsum_both.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include <stdlib.h>
#include <stdio.h>
#define LOOPS 100000
#define NOINLINE __attribute__((noinline))
long long NOINLINE partsum(const unsigned *const vec, size_t len, unsigned limit) {
long long sum = 0;
for (int k = 0; k < LOOPS; k++) {
for (size_t i = 0; i < len; i++) {
if (vec[i] >= limit) {
sum += vec[i];
}
}
}
return sum;
}
void NOINLINE random_shuffle(unsigned *const arr, unsigned N) {
unsigned i, j;
unsigned tmp;
for (i = N - 1; i > 0; i--) {
j = (unsigned) (drand48()*(i+1));
tmp = arr[j];
arr[j] = arr[i];
arr[i] = tmp;
}
}
long long NOINLINE expected(unsigned n, unsigned limit) {
unsigned long long N = n;
unsigned long long L = limit;
return (((L+N)*(N-L+1))/2)*LOOPS;
}
int NOINLINE benchmark(unsigned N, unsigned limit, int randomize, const char *name) {
int ret = 0;
long long s, s_expected;
unsigned vec[N];
for (unsigned i = 0; i < N; i++) {
vec[i] = i + 1;
}
if (randomize) {
random_shuffle(vec, N);
}
s = partsum(vec, N, limit);
s_expected = expected(N, limit);
if (s != s_expected) {
fprintf(stderr,
"*** Error: %s: N: %u, limit: %u, expected: %lld, actual: %lld\n",
name, N, limit, s_expected, s);
ret = 1;
}
return ret;
}
int NOINLINE b_part_ordered(unsigned N) {
return benchmark(N, N/2, 0, "part_ordered");
}
int NOINLINE b_all_ordered(unsigned N) {
return benchmark(N, 0, 0, "all_ordered");
}
int NOINLINE b_part_random(unsigned N) {
return benchmark(N, N/2, 1, "part_random");
}
int NOINLINE b_all_random(unsigned N) {
return benchmark(N, 0, 1, "all_random");
}
int main(int argc, char **argv) {
int err = 0;
unsigned N = atoi(argc > 1 ? argv[1] : "");
if (!N) {
N = 5000;
}
err += b_all_ordered(N);
err += b_all_random(N);
err += b_part_ordered(N);
err += b_part_random(N);
return err;
}
/*
* gcc-8 -O0 -g -fno-omit-frame-pointer -static -o partsum_both partsum_both.c
*
* perf record -o bmiss.data -g --call-graph=fp --event=branch-miss ./partsum_both
* perf script -i bmiss.data > partsum_both_branchmiss.txt
* stackcollapse-perf.pl < partsum_both_branchmiss.txt > partsum_both_branchmiss.stacks
* flamegraph.pl --title "Branch misses" partsum_both_branchmiss.stacks > partsum_both_branchmiss.svg
*
* perf record -o cycles.data -g --call-graph=fp ./partsum_both
* perf script -i cycles.data > partsum_both_cycles.txt
* stackcollapse-perf.pl < partsum_both_cycles.txt > partsum_both_cycles.stacks
* flamegraph.pl --title "Cycles" partsum_both_cycles.stacks > partsum_both_cycles.svg
*/