This repository has been archived by the owner on May 23, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 13
/
nvcc_wrapper.in
executable file
·477 lines (427 loc) · 13 KB
/
nvcc_wrapper.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
#!/bin/bash
#
# This shell script (nvcc_wrapper) wraps both the host compiler and
# NVCC, if you are building legacy C or C++ code with CUDA enabled.
# The script remedies some differences between the interface of NVCC
# and that of the host compiler, in particular for linking.
# It also means that a legacy code doesn't need separate .cu files;
# it can just use .cpp files.
#
# Default settings: change those according to your machine. For
# example, you may have have two different wrappers with either icpc
# or g++ as their back-end compiler. The defaults can be overwritten
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
default_arch="sm_35"
#default_arch="sm_50"
#
# The default C++ compiler.
#
if [[ -z "${KOKKOS_CXX}" ]]; then
host_compiler=@KOKKOS_CXX@
else
host_compiler=${KOKKOS_CXX}
fi
#host_compiler="icpc"
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
#
# Internal variables
#
# C++ files
cpp_files=""
# Host compiler arguments
xcompiler_args=""
# Cuda (NVCC) only arguments
cuda_args=""
# Arguments for both NVCC and Host compiler
shared_args=""
# Argument -c
compile_arg=""
# Argument -o <obj>
output_arg=""
# Linker arguments
xlinker_args=""
# Object files passable to NVCC
object_files=""
# Link objects for the host linker only
object_files_xlinker=""
# Shared libraries with version numbers are not handled correctly by NVCC
shared_versioned_libraries_host=""
shared_versioned_libraries=""
# Does the User set the architecture
arch_set=0
# Does the user overwrite the host compiler
ccbin_set=0
#Error code of compilation
error_code=0
# Do a dry run without actually compiling
dry_run=0
# Skip NVCC compilation and use host compiler directly
host_only=0
host_only_args=""
# Just run version on host compiler
get_host_version=0
# Enable workaround for CUDA 6.5 for pragma ident
replace_pragma_ident=0
# Mark first host compiler argument
first_xcompiler_arg=1
temp_dir=${TMPDIR:-/tmp}
# optimization flag added as a command-line argument
optimization_flag=""
# std standard flag added as a command-line argument
std_flag=""
# Run nvcc a second time to generate dependencies if needed
depfile_separate=0
depfile_output_arg=""
depfile_target_arg=""
# Option to remove duplicate libraries and object files
remove_duplicate_link_files=0
function warn_std_flag() {
echo "nvcc_wrapper - *warning* you have set multiple standard flags (-std=c++1* or --std=c++1*), only the last is used because nvcc can only accept a single std setting"
}
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
do
case $1 in
#show the executed command
--show|--nvcc-wrapper-show)
dry_run=1
;;
#run host compilation only
--host-only)
host_only=1
;;
#get the host version only
--host-version)
get_host_version=1
;;
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
--replace-pragma-ident)
replace_pragma_ident=1
;;
#remove duplicate link files
--remove-duplicate-link-files)
remove_duplicate_link_files=1
;;
#handle source files to be compiled as cuda files
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
cpp_files="$cpp_files $1"
;;
# Ensure we only have one optimization flag because NVCC doesn't allow muliple
-O*)
if [ -n "$optimization_flag" ]; then
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the last is used because nvcc can only accept a single optimization setting."
shared_args=${shared_args/ $optimization_flag/}
fi
if [ "$1" = "-O" ]; then
optimization_flag="-O2"
else
optimization_flag=$1
fi
shared_args="$shared_args $optimization_flag"
;;
#Handle shared args (valid for both nvcc and the host compiler)
-D*)
unescape_commas=`echo "$1" | sed -e 's/\\\,/,/g'`
arg=`printf "%q" $unescape_commas`
shared_args="$shared_args $arg"
;;
-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared|-w)
shared_args="$shared_args $1"
;;
#Handle compilation argument
-c)
compile_arg="$1"
;;
#Handle output argument
-o)
output_arg="$output_arg $1 $2"
shift
;;
# Handle depfile arguments. We map them to a separate call to nvcc.
-MD|-MMD)
depfile_separate=1
host_only_args="$host_only_args $1"
;;
-MF)
depfile_output_arg="-o $2"
host_only_args="$host_only_args $1 $2"
shift
;;
-MT)
depfile_target_arg="$1 $2"
host_only_args="$host_only_args $1 $2"
shift
;;
#Handle known nvcc args
--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*|--fmad*)
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args
--expt-extended-lambda|--expt-relaxed-constexpr)
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument
-rdc|-maxrregcount|--default-stream|-Xnvlink|--fmad)
cuda_args="$cuda_args $1 $2"
shift
;;
-rdc=*|-maxrregcount*|--maxrregcount*)
cuda_args="$cuda_args $1"
;;
#Handle unsupported standard flags
--std=c++1y|-std=c++1y|--std=c++1z|-std=c++1z|--std=gnu++1y|-std=gnu++1y|--std=gnu++1z|-std=gnu++1z|--std=c++2a|-std=c++2a|--std=c++17|-std=c++17)
fallback_std_flag="-std=c++14"
# this is hopefully just occurring in a downstream project during CMake feature tests
# we really have no choice here but to accept the flag and change to an accepted C++ standard
echo "nvcc_wrapper does not accept standard flags $1 since partial standard flags and standards after C++14 are not supported. nvcc_wrapper will use $fallback_std_flag instead. It is undefined behavior to use this flag. This should only be occurring during CMake configuration."
if [ -n "$std_flag" ]; then
warn_std_flag
shared_args=${shared_args/ $std_flag/}
fi
std_flag=$fallback_std_flag
shared_args="$shared_args $std_flag"
;;
-std=gnu*)
corrected_std_flag=${1/gnu/c}
echo "nvcc_wrapper has been given GNU extension standard flag $1 - reverting flag to $corrected_std_flag"
if [ -n "$std_flag" ]; then
warn_std_flag
shared_args=${shared_args/ $std_flag/}
fi
std_flag=$corrected_std_flag
shared_args="$shared_args $std_flag"
;;
--std=c++11|-std=c++11|--std=c++14|-std=c++14)
if [ -n "$std_flag" ]; then
warn_std_flag
shared_args=${shared_args/ $std_flag/}
fi
std_flag=$1
shared_args="$shared_args $std_flag"
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-pedantic|-Wpedantic|-ansi)
;;
#strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C"
-Woverloaded-virtual)
;;
#strip -Xcompiler because we add it
-Xcompiler)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$2"
fi
shift
;;
#strip of "-x cu" because we add that
-x)
if [[ $2 != "cu" ]]; then
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="-x,$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,-x,$2"
fi
fi
shift
;;
#Handle -+ (same as -x c++, specifically used for xl compilers, but mutually exclusive with -x. So replace it with -x c++)
-+)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="-x,c++"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,-x,c++"
fi
;;
#Handle -ccbin (if its not set we can set it to a default value)
-ccbin)
cuda_args="$cuda_args $1 $2"
ccbin_set=1
host_compiler=$2
shift
;;
#Handle -arch argument (if its not set use a default) this is the version with = sign
-arch*|-gencode*)
cuda_args="$cuda_args $1"
arch_set=1
;;
#Handle -code argument (if its not set use a default) this is the version with = sign
-code*)
cuda_args="$cuda_args $1"
;;
#Handle -arch argument (if its not set use a default) this is the version without = sign
-arch|-gencode)
cuda_args="$cuda_args $1 $2"
arch_set=1
shift
;;
#Handle -code argument (if its not set use a default) this is the version without = sign
-code)
cuda_args="$cuda_args $1 $2"
shift
;;
#Handle -Xcudafe argument
-Xcudafe)
cuda_args="$cuda_args -Xcudafe $2"
shift
;;
#Handle -Xlinker argument
-Xlinker)
xlinker_args="$xlinker_args -Xlinker $2"
shift
;;
#Handle args that should be sent to the linker
-Wl,*)
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
host_linker_args="$host_linker_args ${1:4:${#1}}"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.a|*.so|*.o|*.obj)
object_files="$object_files $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
@*|*.dylib)
object_files="$object_files -Xlinker $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle shared libraries with *.so.* names which nvcc can't do.
*.so.*)
shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
;;
#All other args are sent to the host compiler
*)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args=$1
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$1"
fi
;;
esac
shift
done
# Only print host compiler version
if [ $get_host_version -eq 1 ]; then
$host_compiler --version
exit
fi
#Remove duplicate object files
if [ $remove_duplicate_link_files -eq 1 ]; then
for obj in $object_files
do
object_files_reverse="$obj $object_files_reverse"
done
object_files_reverse_clean=""
for obj in $object_files_reverse
do
exists=false
for obj2 in $object_files_reverse_clean
do
if [ "$obj" == "$obj2" ]
then
exists=true
echo "Exists: $obj"
fi
done
if [ "$exists" == "false" ]
then
object_files_reverse_clean="$object_files_reverse_clean $obj"
fi
done
object_files=""
for obj in $object_files_reverse_clean
do
object_files="$obj $object_files"
done
fi
#Add default host compiler if necessary
if [ $ccbin_set -ne 1 ]; then
cuda_args="$cuda_args -ccbin $host_compiler"
fi
#Add architecture command
if [ $arch_set -ne 1 ]; then
cuda_args="$cuda_args -arch=$default_arch"
fi
#Compose compilation command
nvcc_command="@CMAKE_CUDA_COMPILER@ $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
if [ $first_xcompiler_arg -eq 0 ]; then
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
fi
#Replace all commas in xcompiler_args with a space for the host only command
xcompiler_args=${xcompiler_args//,/" "}
#Compose host only command
host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
cpp_files2=""
for file in $cpp_files
do
var=`grep pragma ${file} | grep ident | grep "#"`
if [ "${#var}" -gt 0 ]
then
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
else
cpp_files2="$cpp_files2 $file"
fi
done
cpp_files=$cpp_files2
#echo $cpp_files
fi
if [ "$cpp_files" ]; then
nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
else
nvcc_command="$nvcc_command $object_files"
fi
if [ "$cpp_files" ]; then
host_command="$host_command $object_files $cpp_files"
else
host_command="$host_command $object_files"
fi
if [ $depfile_separate -eq 1 ]; then
# run nvcc a second time to generate dependencies (without compiling)
nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg"
else
nvcc_depfile_command=""
fi
nvcc_command="$nvcc_command $compile_arg $output_arg"
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
if [ $host_only -eq 1 ]; then
echo $host_command
elif [ -n "$nvcc_depfile_command" ]; then
echo $nvcc_command "&&" $nvcc_depfile_command
else
echo $nvcc_command
fi
exit 0
fi
#Run compilation command
if [ $host_only -eq 1 ]; then
if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then
echo "$host_command"
fi
$host_command
elif [ -n "$nvcc_depfile_command" ]; then
if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then
echo "$nvcc_command && $nvcc_depfile_command"
fi
$nvcc_command && $nvcc_depfile_command
else
if [ "$NVCC_WRAPPER_SHOW_COMMANDS_BEING_RUN" == "1" ] ; then
echo "$nvcc_command"
fi
$nvcc_command
fi
error_code=$?
#Report error code
exit $error_code