-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpu_replication_aos.cu
110 lines (88 loc) · 3.15 KB
/
gpu_replication_aos.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/*************************************************************************
Copyright (C) 2016 Evandro Taquary
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*************************************************************************/
#include <iostream>
#include <sys/time.h>
#include "modcpy.h"
using namespace std;
#define CHECK(call) \
{ \
const cudaError_t error = call; \
if (error != cudaSuccess) \
{ \
cout << "Error: " << __FILE__ ": " << __LINE__ << ", "; \
cout << "code: "<< error << ", reason: " << cudaGetErrorString(error) << endl; \
exit(EXIT_FAILURE); \
} \
}
typedef struct {
short parent;
short child1;
short child2;
double branch;
}node;
int main(int argc, char *argv[])
{
if(argc != 3){
cout << "Usage: " << argv[0] << " #nodes #replications" << endl;
exit(EXIT_FAILURE);
}
const int NODS = atoi(argv[1]);
const int REPS = atoi(argv[2]);
struct timeval begin, end;
double time_spent;
cudaDeviceProp device;
cudaGetDeviceProperties(&device,0);
node tree[NODS];
node *d_tree;
size_t treeBytes = sizeof(tree);
size_t repBytes = treeBytes * REPS;
node *h_replics = (node*) malloc(repBytes);
node *d_replic;
int blockSize = device.warpSize*32;
int gridSize = ((repBytes/sizeof(int4))/blockSize);
dim3 grid = dim3(gridSize);
dim3 block = dim3(blockSize);
for(int i=0; i<NODS; i++)
{
tree[i].parent = i;
tree[i].child1 = i;
tree[i].child2 = i;
tree[i].branch = i;
}
CHECK(cudaMalloc((void **) &d_tree, treeBytes));
CHECK(cudaMemcpy(d_tree, &tree, treeBytes, cudaMemcpyHostToDevice));
CHECK(cudaMalloc((void **) &d_replic, repBytes));
/*******************************PARALLEL MEASUREMENT*******************************/
gettimeofday(&begin, NULL);
modcpy<<<grid, block>>>(d_replic, d_tree, repBytes, treeBytes);
CHECK(cudaDeviceSynchronize());
gettimeofday(&end, NULL);
time_spent = (double) (end.tv_usec - begin.tv_usec)/1000 + (end.tv_sec - begin.tv_sec)*1000;
cout << "Time spent:\t" << time_spent << "ms " << endl;
/*******************************PARALLEL MEASUREMENT*******************************/
CHECK(cudaMemcpy(h_replics, d_replic, repBytes, cudaMemcpyDeviceToHost));
for(int i=0; i<REPS; i++)
for(int j=0; j<NODS; j++)
if( h_replics[i*NODS+j].parent != j ||
h_replics[i*NODS+j].child1 != j ||
h_replics[i*NODS+j].child2 != j ||
h_replics[i*NODS+j].branch != j )
{
cout << "Data doesn't match!" << endl;
exit(1);
}
cout << "Data does match!" << endl;
cudaDeviceReset();
exit(EXIT_SUCCESS);
}